2729 lines
86 KiB
C++
2729 lines
86 KiB
C++
|
|
// Redistribution and use in source and binary forms, with or without
|
||
|
|
// modification, are permitted provided that the following conditions
|
||
|
|
// are met:
|
||
|
|
// * Redistributions of source code must retain the above copyright
|
||
|
|
// notice, this list of conditions and the following disclaimer.
|
||
|
|
// * Redistributions in binary form must reproduce the above copyright
|
||
|
|
// notice, this list of conditions and the following disclaimer in the
|
||
|
|
// documentation and/or other materials provided with the distribution.
|
||
|
|
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||
|
|
// contributors may be used to endorse or promote products derived
|
||
|
|
// from this software without specific prior written permission.
|
||
|
|
//
|
||
|
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||
|
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||
|
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||
|
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||
|
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||
|
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||
|
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||
|
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||
|
|
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||
|
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||
|
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||
|
|
//
|
||
|
|
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||
|
|
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||
|
|
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||
|
|
|
||
|
|
#include "foundation/PxMemory.h"
|
||
|
|
#include "foundation/PxBitUtils.h"
|
||
|
|
#include "GuBucketPruner.h"
|
||
|
|
#include "GuInternal.h"
|
||
|
|
#include "CmVisualization.h"
|
||
|
|
#include "CmRadixSort.h"
|
||
|
|
|
||
|
|
using namespace physx;
|
||
|
|
using namespace aos;
|
||
|
|
using namespace Gu;
|
||
|
|
|
||
|
|
#define INVALID_HANDLE 0xffffffff
|
||
|
|
|
||
|
|
/*
|
||
|
|
TODO:
|
||
|
|
- if Core is always available, mSortedObjects could be replaced with just indices to mCoreObjects => less memory.
|
||
|
|
- UTS:
|
||
|
|
- test that queries against empty boxes all return false
|
||
|
|
- invalidate after 16 removes
|
||
|
|
- check shiftOrigin stuff (esp what happens to emptied boxes)
|
||
|
|
- isn't there a very hard-to-find bug waiting to happen in there,
|
||
|
|
when the shift touches the empty box and overrides mdata0/mdata1 with "wrong" values that break the sort?
|
||
|
|
- revisit updateObject/removeObject
|
||
|
|
- optimize/cache computation of free global bounds before clipRay
|
||
|
|
|
||
|
|
- remove temp memory buffers (sorted arrays)
|
||
|
|
- take care of code duplication
|
||
|
|
- better code to generate SIMD 0x7fffffff
|
||
|
|
- refactor SIMD tests
|
||
|
|
- optimize:
|
||
|
|
- better split values
|
||
|
|
- optimize update (bitmap, less data copy, etc)
|
||
|
|
- use ray limits in traversal code too?
|
||
|
|
- the SIMD XBOX code operates on Min/Max rather than C/E. Change format?
|
||
|
|
- or just try the alternative ray-box code (as on PC) ==> pretty much exactly the same speed
|
||
|
|
*/
|
||
|
|
|
||
|
|
//#define VERIFY_SORT
|
||
|
|
//#define BRUTE_FORCE_LIMIT 32
|
||
|
|
#define LOCAL_SIZE 256 // Size of various local arrays. Dynamic allocations occur if exceeded.
|
||
|
|
#define USE_SIMD // Use SIMD code or not (sanity performance check)
|
||
|
|
#define NODE_SORT // Enable/disable node sorting
|
||
|
|
#define NODE_SORT_MIN_COUNT 16 // Limit above which node sorting is performed
|
||
|
|
#if PX_INTEL_FAMILY
|
||
|
|
#if COMPILE_VECTOR_INTRINSICS
|
||
|
|
#define CAN_USE_MOVEMASK
|
||
|
|
#endif
|
||
|
|
#endif
|
||
|
|
|
||
|
|
#define ALIGN16(size) ((unsigned(size)+15) & unsigned(~15))
|
||
|
|
|
||
|
|
#ifdef _DEBUG
|
||
|
|
#define AlignedLoad V4LoadU
|
||
|
|
#define AlignedStore V4StoreU
|
||
|
|
#else
|
||
|
|
#define AlignedLoad V4LoadA
|
||
|
|
#define AlignedStore V4StoreA
|
||
|
|
#endif
|
||
|
|
|
||
|
|
// SAT-based ray-box overlap test has accuracy issues for long rays, so we clip them against the global AABB to limit these issues.
|
||
|
|
static void clipRay(const PxVec3& rayOrig, const PxVec3& rayDir, float& maxDist, const PxVec3& boxMin, const PxVec3& boxMax)
|
||
|
|
{
|
||
|
|
const PxVec3 boxCenter = (boxMax + boxMin)*0.5f;
|
||
|
|
const PxVec3 boxExtents = (boxMax - boxMin)*0.5f;
|
||
|
|
const float dpc = boxCenter.dot(rayDir);
|
||
|
|
const float extentsMagnitude = boxExtents.magnitude();
|
||
|
|
const float dpMin = dpc - extentsMagnitude;
|
||
|
|
const float dpMax = dpc + extentsMagnitude;
|
||
|
|
const float dpO = rayOrig.dot(rayDir);
|
||
|
|
const float boxLength = extentsMagnitude * 2.0f;
|
||
|
|
const float distToBox = PxMin(PxAbs(dpMin - dpO), PxAbs(dpMax - dpO));
|
||
|
|
maxDist = distToBox + boxLength * 2.0f;
|
||
|
|
}
|
||
|
|
|
||
|
|
BucketPrunerNode::BucketPrunerNode()
|
||
|
|
{
|
||
|
|
for(PxU32 i=0;i<5;i++)
|
||
|
|
mBucketBox[i].setEmpty();
|
||
|
|
}
|
||
|
|
|
||
|
|
static const PxU8 gCodes[] = { 4, 4, 4, 4, 4, 3, 2, 2,
|
||
|
|
4, 1, 0, 0, 4, 1, 0, 0,
|
||
|
|
4, 1, 0, 0, 2, 1, 0, 0,
|
||
|
|
3, 1, 0, 0, 2, 1, 0, 0};
|
||
|
|
|
||
|
|
#ifdef CAN_USE_MOVEMASK
|
||
|
|
/*static PX_FORCE_INLINE PxU32 classifyBox_x86(const BucketBox& box, const PxVec4& limits, const bool useY, const bool isCrossBucket)
|
||
|
|
{
|
||
|
|
const Vec4V extents = AlignedLoad(&box.mExtents.x);
|
||
|
|
const Vec4V center = AlignedLoad(&box.mCenter.x);
|
||
|
|
const Vec4V plus = V4Add(extents, center);
|
||
|
|
const Vec4V minus = V4Sub(extents, center);
|
||
|
|
|
||
|
|
Vec4V tmp;
|
||
|
|
if(useY) // PT: this is a constant so branch prediction works here
|
||
|
|
tmp = _mm_shuffle_ps(plus, minus, _MM_SHUFFLE(0,1,0,1));
|
||
|
|
else
|
||
|
|
tmp = _mm_shuffle_ps(plus, minus, _MM_SHUFFLE(0,2,0,2));
|
||
|
|
|
||
|
|
const Vec4V comp = _mm_shuffle_ps(tmp, tmp, _MM_SHUFFLE(0,2,1,3)); // oh well, nm
|
||
|
|
|
||
|
|
const PxU32 Code = (PxU32)_mm_movemask_ps(V4IsGrtr(V4LoadA(&limits.x), comp));
|
||
|
|
return gCodes[Code | PxU32(isCrossBucket)<<4];
|
||
|
|
}*/
|
||
|
|
|
||
|
|
static PX_FORCE_INLINE PxU32 classifyBox_x86(const Vec4V boxMin, const Vec4V boxMax, const PxVec4& limits, const bool useY, const bool isCrossBucket)
|
||
|
|
{
|
||
|
|
const Vec4V plus = boxMax;
|
||
|
|
const Vec4V minus = V4Neg(boxMin);
|
||
|
|
|
||
|
|
Vec4V tmp;
|
||
|
|
if(useY) // PT: this is a constant so branch prediction works here
|
||
|
|
tmp = _mm_shuffle_ps(plus, minus, _MM_SHUFFLE(0,1,0,1));
|
||
|
|
else
|
||
|
|
tmp = _mm_shuffle_ps(plus, minus, _MM_SHUFFLE(0,2,0,2));
|
||
|
|
|
||
|
|
const Vec4V comp = _mm_shuffle_ps(tmp, tmp, _MM_SHUFFLE(0,2,1,3)); // oh well, nm
|
||
|
|
|
||
|
|
const PxU32 Code = PxU32(_mm_movemask_ps(V4IsGrtr(V4LoadA(&limits.x), comp)));
|
||
|
|
return gCodes[Code | PxU32(isCrossBucket)<<4];
|
||
|
|
}
|
||
|
|
#endif
|
||
|
|
|
||
|
|
#ifdef CAN_USE_MOVEMASK
|
||
|
|
#if PX_DEBUG
|
||
|
|
#define USE_CLASSIFY_BOX
|
||
|
|
#endif
|
||
|
|
#else
|
||
|
|
#define USE_CLASSIFY_BOX
|
||
|
|
#endif
|
||
|
|
|
||
|
|
#ifdef USE_CLASSIFY_BOX
|
||
|
|
static PX_FORCE_INLINE PxU32 classifyBox(const BucketBox& box, const float limitX, const float limitYZ, const PxU32 yz, const bool isCrossBucket)
|
||
|
|
{
|
||
|
|
const bool upperPart = (box.mCenter[yz] + box.mExtents[yz])<limitYZ;
|
||
|
|
const bool lowerPart = (box.mCenter[yz] - box.mExtents[yz])>limitYZ;
|
||
|
|
const bool leftPart = (box.mCenter.x + box.mExtents.x)<limitX;
|
||
|
|
const bool rightPart = (box.mCenter.x - box.mExtents.x)>limitX;
|
||
|
|
|
||
|
|
// Table-based box classification avoids many branches
|
||
|
|
const PxU32 Code = PxU32(rightPart)|(PxU32(leftPart)<<1)|(PxU32(lowerPart)<<2)|(PxU32(upperPart)<<3);
|
||
|
|
return gCodes[Code + (isCrossBucket ? 16 : 0)];
|
||
|
|
}
|
||
|
|
#endif
|
||
|
|
|
||
|
|
void BucketPrunerNode::classifyBoxes( float limitX, float limitYZ,
|
||
|
|
PxU32 nb, BucketBox* PX_RESTRICT boxes, const PrunerPayload* PX_RESTRICT objects,
|
||
|
|
const PxTransform* PX_RESTRICT transforms,
|
||
|
|
BucketBox* PX_RESTRICT sortedBoxes, PrunerPayload* PX_RESTRICT sortedObjects,
|
||
|
|
PxTransform* PX_RESTRICT sortedTransforms,
|
||
|
|
bool isCrossBucket, PxU32 sortAxis)
|
||
|
|
{
|
||
|
|
const PxU32 yz = PxU32(sortAxis == 1 ? 2 : 1);
|
||
|
|
|
||
|
|
#if PX_DEBUG
|
||
|
|
{
|
||
|
|
float prev = boxes[0].mDebugMin;
|
||
|
|
for(PxU32 i=1;i<nb;i++)
|
||
|
|
{
|
||
|
|
const float current = boxes[i].mDebugMin;
|
||
|
|
PX_ASSERT(current>=prev);
|
||
|
|
prev = current;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
#endif
|
||
|
|
|
||
|
|
// Local (stack-based) min/max bucket bounds
|
||
|
|
PX_ALIGN(16, PxVec4) bucketBoxMin[5];
|
||
|
|
PX_ALIGN(16, PxVec4) bucketBoxMax[5];
|
||
|
|
{
|
||
|
|
const PxBounds3 empty = PxBounds3::empty();
|
||
|
|
for(PxU32 i=0;i<5;i++)
|
||
|
|
{
|
||
|
|
mCounters[i] = 0;
|
||
|
|
bucketBoxMin[i] = PxVec4(empty.minimum, 0.0f);
|
||
|
|
bucketBoxMax[i] = PxVec4(empty.maximum, 0.0f);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
{
|
||
|
|
#ifdef CAN_USE_MOVEMASK
|
||
|
|
// DS: order doesn't play nice with x86 shuffles :-|
|
||
|
|
PX_ALIGN(16, PxVec4) limits(-limitX, limitX, -limitYZ, limitYZ);
|
||
|
|
const bool useY = yz==1;
|
||
|
|
#endif
|
||
|
|
// Determine in which bucket each object falls, update bucket bounds
|
||
|
|
for(PxU32 i=0;i<nb;i++)
|
||
|
|
{
|
||
|
|
const Vec4V boxCenterV = AlignedLoad(&boxes[i].mCenter.x);
|
||
|
|
const Vec4V boxExtentsV = AlignedLoad(&boxes[i].mExtents.x);
|
||
|
|
const Vec4V boxMinV = V4Sub(boxCenterV, boxExtentsV);
|
||
|
|
const Vec4V boxMaxV = V4Add(boxCenterV, boxExtentsV);
|
||
|
|
|
||
|
|
#ifdef CAN_USE_MOVEMASK
|
||
|
|
// const PxU32 index = classifyBox_x86(boxes[i], limits, useY, isCrossBucket);
|
||
|
|
const PxU32 index = classifyBox_x86(boxMinV, boxMaxV, limits, useY, isCrossBucket);
|
||
|
|
#if PX_DEBUG
|
||
|
|
const PxU32 index_ = classifyBox(boxes[i], limitX, limitYZ, yz, isCrossBucket);
|
||
|
|
PX_ASSERT(index == index_);
|
||
|
|
#endif
|
||
|
|
#else
|
||
|
|
const PxU32 index = classifyBox(boxes[i], limitX, limitYZ, yz, isCrossBucket);
|
||
|
|
#endif
|
||
|
|
// Merge boxes
|
||
|
|
{
|
||
|
|
const Vec4V mergedMinV = V4Min(V4LoadA(&bucketBoxMin[index].x), boxMinV);
|
||
|
|
const Vec4V mergedMaxV = V4Max(V4LoadA(&bucketBoxMax[index].x), boxMaxV);
|
||
|
|
V4StoreA(mergedMinV, &bucketBoxMin[index].x);
|
||
|
|
V4StoreA(mergedMaxV, &bucketBoxMax[index].x);
|
||
|
|
}
|
||
|
|
boxes[i].mData0 = index; // Store bucket index for current box in this temporary location
|
||
|
|
mCounters[index]++;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
{
|
||
|
|
// Regenerate offsets
|
||
|
|
mOffsets[0]=0;
|
||
|
|
for(PxU32 i=0;i<4;i++)
|
||
|
|
mOffsets[i+1] = mOffsets[i] + mCounters[i];
|
||
|
|
}
|
||
|
|
|
||
|
|
{
|
||
|
|
// Group boxes with same bucket index together
|
||
|
|
for(PxU32 i=0;i<nb;i++)
|
||
|
|
{
|
||
|
|
const PxU32 bucketOffset = mOffsets[boxes[i].mData0]++; // Bucket index for current box was stored in mData0 by previous loop
|
||
|
|
// The 2 following lines are the same as:
|
||
|
|
// sortedBoxes[bucketOffset] = boxes[i];
|
||
|
|
AlignedStore(AlignedLoad(&boxes[i].mCenter.x), &sortedBoxes[bucketOffset].mCenter.x);
|
||
|
|
AlignedStore(AlignedLoad(&boxes[i].mExtents.x), &sortedBoxes[bucketOffset].mExtents.x);
|
||
|
|
|
||
|
|
#if PX_DEBUG
|
||
|
|
sortedBoxes[bucketOffset].mDebugMin = boxes[i].mDebugMin;
|
||
|
|
#endif
|
||
|
|
sortedObjects[bucketOffset] = objects[i];
|
||
|
|
sortedTransforms[bucketOffset] = transforms[i];
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
{
|
||
|
|
// Regenerate offsets
|
||
|
|
mOffsets[0]=0;
|
||
|
|
for(PxU32 i=0;i<4;i++)
|
||
|
|
mOffsets[i+1] = mOffsets[i] + mCounters[i];
|
||
|
|
}
|
||
|
|
|
||
|
|
{
|
||
|
|
// Convert local (stack-based) min/max bucket bounds to persistent center/extents format
|
||
|
|
const float Half = 0.5f;
|
||
|
|
const FloatV HalfV = FLoad(Half);
|
||
|
|
PX_ALIGN(16, PxVec4) bucketCenter;
|
||
|
|
PX_ALIGN(16, PxVec4) bucketExtents;
|
||
|
|
for(PxU32 i=0;i<5;i++)
|
||
|
|
{
|
||
|
|
// The following lines are the same as:
|
||
|
|
// mBucketBox[i].mCenter = bucketBox[i].getCenter();
|
||
|
|
// mBucketBox[i].mExtents = bucketBox[i].getExtents();
|
||
|
|
const Vec4V bucketBoxMinV = V4LoadA(&bucketBoxMin[i].x);
|
||
|
|
const Vec4V bucketBoxMaxV = V4LoadA(&bucketBoxMax[i].x);
|
||
|
|
const Vec4V bucketBoxCenterV = V4Scale(V4Add(bucketBoxMaxV, bucketBoxMinV), HalfV);
|
||
|
|
const Vec4V bucketBoxExtentsV = V4Scale(V4Sub(bucketBoxMaxV, bucketBoxMinV), HalfV);
|
||
|
|
V4StoreA(bucketBoxCenterV, &bucketCenter.x);
|
||
|
|
V4StoreA(bucketBoxExtentsV, &bucketExtents.x);
|
||
|
|
mBucketBox[i].mCenter = PxVec3(bucketCenter.x, bucketCenter.y, bucketCenter.z);
|
||
|
|
mBucketBox[i].mExtents = PxVec3(bucketExtents.x, bucketExtents.y, bucketExtents.z);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
#if PX_DEBUG
|
||
|
|
for(PxU32 j=0;j<5;j++)
|
||
|
|
{
|
||
|
|
const PxU32 count = mCounters[j];
|
||
|
|
if(count)
|
||
|
|
{
|
||
|
|
const BucketBox* base = sortedBoxes + mOffsets[j];
|
||
|
|
float prev = base[0].mDebugMin;
|
||
|
|
for(PxU32 i=1;i<count;i++)
|
||
|
|
{
|
||
|
|
const float current = base[i].mDebugMin;
|
||
|
|
PX_ASSERT(current>=prev);
|
||
|
|
prev = current;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
#endif
|
||
|
|
}
|
||
|
|
|
||
|
|
///////////////////////////////////////////////////////////////////////////////
|
||
|
|
|
||
|
|
static void processChildBuckets(PxU32 nbAllocated,
|
||
|
|
BucketBox* sortedBoxesInBucket, PrunerPayload* sortedObjectsInBucket,
|
||
|
|
PxTransform* sortedTransformsInBucket,
|
||
|
|
const BucketPrunerNode& bucket, BucketPrunerNode* PX_RESTRICT childBucket,
|
||
|
|
BucketBox* PX_RESTRICT baseBucketsBoxes, PrunerPayload* PX_RESTRICT baseBucketsObjects,
|
||
|
|
PxTransform* baseBucketTransforms,
|
||
|
|
PxU32 sortAxis)
|
||
|
|
{
|
||
|
|
PX_UNUSED(nbAllocated);
|
||
|
|
|
||
|
|
const PxU32 yz = PxU32(sortAxis == 1 ? 2 : 1);
|
||
|
|
for(PxU32 i=0;i<5;i++)
|
||
|
|
{
|
||
|
|
const PxU32 nbInBucket = bucket.mCounters[i];
|
||
|
|
if(!nbInBucket)
|
||
|
|
{
|
||
|
|
childBucket[i].initCounters();
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
BucketBox* bucketsBoxes = baseBucketsBoxes + bucket.mOffsets[i];
|
||
|
|
PrunerPayload* bucketsObjects = baseBucketsObjects + bucket.mOffsets[i];
|
||
|
|
PxTransform* bucketTransforms = baseBucketTransforms + bucket.mOffsets[i];
|
||
|
|
PX_ASSERT(nbInBucket<=nbAllocated);
|
||
|
|
|
||
|
|
const float limitX = bucket.mBucketBox[i].mCenter.x;
|
||
|
|
const float limitYZ = bucket.mBucketBox[i].mCenter[yz];
|
||
|
|
const bool isCrossBucket = i==4;
|
||
|
|
childBucket[i].classifyBoxes(limitX, limitYZ, nbInBucket, bucketsBoxes, bucketsObjects,
|
||
|
|
bucketTransforms,
|
||
|
|
sortedBoxesInBucket, sortedObjectsInBucket,
|
||
|
|
sortedTransformsInBucket,
|
||
|
|
isCrossBucket, sortAxis);
|
||
|
|
|
||
|
|
PxMemCopy(bucketsBoxes, sortedBoxesInBucket, sizeof(BucketBox)*nbInBucket);
|
||
|
|
PxMemCopy(bucketsObjects, sortedObjectsInBucket, sizeof(PrunerPayload)*nbInBucket);
|
||
|
|
PxMemCopy(bucketTransforms, sortedTransformsInBucket, sizeof(PxTransform)*nbInBucket);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
///////////////////////////////////////////////////////////////////////////////
|
||
|
|
|
||
|
|
static PX_FORCE_INLINE PxU32 encodeFloat(PxU32 newPos)
|
||
|
|
{
|
||
|
|
//we may need to check on -0 and 0
|
||
|
|
//But it should make no practical difference.
|
||
|
|
if(newPos & PX_SIGN_BITMASK) //negative?
|
||
|
|
return ~newPos;//reverse sequence of negative numbers
|
||
|
|
else
|
||
|
|
return newPos | PX_SIGN_BITMASK; // flip sign
|
||
|
|
}
|
||
|
|
|
||
|
|
static PX_FORCE_INLINE void computeRayLimits(float& rayMin, float& rayMax, const PxVec3& rayOrig, const PxVec3& rayDir, float maxDist, PxU32 sortAxis)
|
||
|
|
{
|
||
|
|
const float rayOrigValue = rayOrig[sortAxis];
|
||
|
|
const float rayDirValue = rayDir[sortAxis] * maxDist;
|
||
|
|
rayMin = PxMin(rayOrigValue, rayOrigValue + rayDirValue);
|
||
|
|
rayMax = PxMax(rayOrigValue, rayOrigValue + rayDirValue);
|
||
|
|
}
|
||
|
|
|
||
|
|
static PX_FORCE_INLINE void computeRayLimits(float& rayMin, float& rayMax, const PxVec3& rayOrig, const PxVec3& rayDir, float maxDist, const PxVec3& inflate, PxU32 sortAxis)
|
||
|
|
{
|
||
|
|
const float inflateValue = inflate[sortAxis];
|
||
|
|
const float rayOrigValue = rayOrig[sortAxis];
|
||
|
|
const float rayDirValue = rayDir[sortAxis] * maxDist;
|
||
|
|
rayMin = PxMin(rayOrigValue, rayOrigValue + rayDirValue) - inflateValue;
|
||
|
|
rayMax = PxMax(rayOrigValue, rayOrigValue + rayDirValue) + inflateValue;
|
||
|
|
}
|
||
|
|
|
||
|
|
static PX_FORCE_INLINE void encodeBoxMinMax(BucketBox& box, const PxU32 axis)
|
||
|
|
{
|
||
|
|
const float min = box.mCenter[axis] - box.mExtents[axis];
|
||
|
|
const float max = box.mCenter[axis] + box.mExtents[axis];
|
||
|
|
|
||
|
|
const PxU32* binaryMin = reinterpret_cast<const PxU32*>(&min);
|
||
|
|
const PxU32* binaryMax = reinterpret_cast<const PxU32*>(&max);
|
||
|
|
box.mData0 = encodeFloat(binaryMin[0]);
|
||
|
|
box.mData1 = encodeFloat(binaryMax[0]);
|
||
|
|
}
|
||
|
|
|
||
|
|
///////////////////////////////////////////////////////////////////////////////
|
||
|
|
|
||
|
|
BucketPrunerCore::BucketPrunerCore(bool externalMemory) :
|
||
|
|
mCoreNbObjects (0),
|
||
|
|
mCoreCapacity (0),
|
||
|
|
mCoreBoxes (NULL),
|
||
|
|
mCoreObjects (NULL),
|
||
|
|
mCoreTransforms (NULL),
|
||
|
|
mCoreRemap (NULL),
|
||
|
|
mSortedWorldBoxes (NULL),
|
||
|
|
mSortedObjects (NULL),
|
||
|
|
mSortedTransforms (NULL),
|
||
|
|
#ifdef FREE_PRUNER_SIZE
|
||
|
|
mNbFree (0),
|
||
|
|
#endif
|
||
|
|
mSortedNb (0),
|
||
|
|
mSortedCapacity (0),
|
||
|
|
mSortAxis (0),
|
||
|
|
mDirty (false),
|
||
|
|
mOwnMemory (!externalMemory)
|
||
|
|
{
|
||
|
|
mGlobalBox.setEmpty();
|
||
|
|
|
||
|
|
mLevel1.initCounters();
|
||
|
|
|
||
|
|
for(PxU32 i=0;i<5;i++)
|
||
|
|
mLevel2[i].initCounters();
|
||
|
|
for(PxU32 j=0;j<5;j++)
|
||
|
|
for(PxU32 i=0;i<5;i++)
|
||
|
|
mLevel3[j][i].initCounters();
|
||
|
|
}
|
||
|
|
|
||
|
|
BucketPrunerCore::~BucketPrunerCore()
|
||
|
|
{
|
||
|
|
release();
|
||
|
|
}
|
||
|
|
|
||
|
|
void BucketPrunerCore::release()
|
||
|
|
{
|
||
|
|
mDirty = true;
|
||
|
|
mCoreNbObjects = 0;
|
||
|
|
|
||
|
|
mCoreCapacity = 0;
|
||
|
|
if(mOwnMemory)
|
||
|
|
{
|
||
|
|
PX_FREE(mCoreBoxes);
|
||
|
|
PX_FREE(mCoreObjects);
|
||
|
|
PX_FREE(mCoreTransforms);
|
||
|
|
PX_FREE(mCoreRemap);
|
||
|
|
}
|
||
|
|
|
||
|
|
PX_FREE(mSortedWorldBoxes);
|
||
|
|
PX_FREE(mSortedObjects);
|
||
|
|
PX_FREE(mSortedTransforms);
|
||
|
|
mSortedNb = 0;
|
||
|
|
mSortedCapacity = 0;
|
||
|
|
|
||
|
|
#ifdef FREE_PRUNER_SIZE
|
||
|
|
mNbFree = 0;
|
||
|
|
#endif
|
||
|
|
#ifdef USE_REGULAR_HASH_MAP
|
||
|
|
mMap.clear();
|
||
|
|
#else
|
||
|
|
mMap.purge();
|
||
|
|
#endif
|
||
|
|
}
|
||
|
|
|
||
|
|
void BucketPrunerCore::setExternalMemory(PxU32 nbObjects, PxBounds3* boxes, PrunerPayload* objects, PxTransform* transforms)
|
||
|
|
{
|
||
|
|
PX_ASSERT(!mOwnMemory);
|
||
|
|
mCoreNbObjects = nbObjects;
|
||
|
|
mCoreBoxes = boxes;
|
||
|
|
mCoreObjects = objects;
|
||
|
|
mCoreTransforms = transforms;
|
||
|
|
mCoreRemap = NULL;
|
||
|
|
}
|
||
|
|
|
||
|
|
void BucketPrunerCore::allocateSortedMemory(PxU32 nb)
|
||
|
|
{
|
||
|
|
mSortedNb = nb;
|
||
|
|
if(nb<=mSortedCapacity && (nb>=mSortedCapacity/2))
|
||
|
|
return;
|
||
|
|
|
||
|
|
const PxU32 capacity = PxNextPowerOfTwo(nb);
|
||
|
|
mSortedCapacity = capacity;
|
||
|
|
|
||
|
|
PxU32 bytesNeededForBoxes = capacity*sizeof(BucketBox);
|
||
|
|
bytesNeededForBoxes = ALIGN16(bytesNeededForBoxes);
|
||
|
|
|
||
|
|
PxU32 bytesNeededForObjects = capacity*sizeof(PrunerPayload);
|
||
|
|
bytesNeededForObjects = ALIGN16(bytesNeededForObjects);
|
||
|
|
|
||
|
|
// PT: TODO: I don't remember what this alignment is for, maybe we don't need it
|
||
|
|
PxU32 bytesNeededForTransforms = capacity*sizeof(PxTransform);
|
||
|
|
bytesNeededForTransforms = ALIGN16(bytesNeededForTransforms);
|
||
|
|
|
||
|
|
PX_FREE(mSortedObjects);
|
||
|
|
PX_FREE(mSortedWorldBoxes);
|
||
|
|
PX_FREE(mSortedTransforms);
|
||
|
|
|
||
|
|
mSortedWorldBoxes = reinterpret_cast<BucketBox*>(PX_ALLOC(bytesNeededForBoxes, "BucketPruner"));
|
||
|
|
mSortedObjects = reinterpret_cast<PrunerPayload*>(PX_ALLOC(bytesNeededForObjects, "BucketPruner"));
|
||
|
|
mSortedTransforms = reinterpret_cast<PxTransform*>(PX_ALLOC(bytesNeededForTransforms, "BucketPruner"));
|
||
|
|
|
||
|
|
PX_ASSERT(!(size_t(mSortedWorldBoxes)&15));
|
||
|
|
PX_ASSERT(!(size_t(mSortedObjects)&15));
|
||
|
|
PX_ASSERT(!(size_t(mSortedTransforms)&15));
|
||
|
|
}
|
||
|
|
|
||
|
|
///////////////////////////////////////////////////////////////////////////////
|
||
|
|
|
||
|
|
void BucketPrunerCore::resizeCore()
|
||
|
|
{
|
||
|
|
const PxU32 capacity = mCoreCapacity ? mCoreCapacity*2 : 32;
|
||
|
|
mCoreCapacity = capacity;
|
||
|
|
|
||
|
|
const PxU32 bytesNeededForBoxes = capacity*sizeof(PxBounds3);
|
||
|
|
const PxU32 bytesNeededForObjects = capacity*sizeof(PrunerPayload);
|
||
|
|
const PxU32 bytesNeededForTransforms = capacity*sizeof(PxTransform);
|
||
|
|
const PxU32 bytesNeededForRemap = capacity*sizeof(PxU32);
|
||
|
|
|
||
|
|
PxBounds3* newCoreBoxes = reinterpret_cast<PxBounds3*>(PX_ALLOC(bytesNeededForBoxes, "BucketPruner"));
|
||
|
|
PrunerPayload* newCoreObjects = reinterpret_cast<PrunerPayload*>(PX_ALLOC(bytesNeededForObjects, "BucketPruner"));
|
||
|
|
PxTransform* newCoreTransforms = reinterpret_cast<PxTransform*>(PX_ALLOC(bytesNeededForTransforms, "BucketPruner"));
|
||
|
|
PxU32* newCoreRemap = reinterpret_cast<PxU32*>(PX_ALLOC(bytesNeededForRemap, "BucketPruner"));
|
||
|
|
if(mCoreBoxes)
|
||
|
|
{
|
||
|
|
PxMemCopy(newCoreBoxes, mCoreBoxes, mCoreNbObjects*sizeof(PxBounds3));
|
||
|
|
PX_FREE(mCoreBoxes);
|
||
|
|
}
|
||
|
|
if(mCoreObjects)
|
||
|
|
{
|
||
|
|
PxMemCopy(newCoreObjects, mCoreObjects, mCoreNbObjects*sizeof(PrunerPayload));
|
||
|
|
PX_FREE(mCoreObjects);
|
||
|
|
}
|
||
|
|
if(mCoreTransforms)
|
||
|
|
{
|
||
|
|
PxMemCopy(newCoreTransforms, mCoreTransforms, mCoreNbObjects*sizeof(PxTransform));
|
||
|
|
PX_FREE(mCoreTransforms);
|
||
|
|
}
|
||
|
|
if(mCoreRemap)
|
||
|
|
{
|
||
|
|
PxMemCopy(newCoreRemap, mCoreRemap, mCoreNbObjects*sizeof(PxU32));
|
||
|
|
PX_FREE(mCoreRemap);
|
||
|
|
}
|
||
|
|
mCoreBoxes = newCoreBoxes;
|
||
|
|
mCoreObjects = newCoreObjects;
|
||
|
|
mCoreTransforms = newCoreTransforms;
|
||
|
|
mCoreRemap = newCoreRemap;
|
||
|
|
}
|
||
|
|
|
||
|
|
PX_FORCE_INLINE void BucketPrunerCore::addObjectInternal(const PrunerPayload& object, const PxBounds3& worldAABB, const PxTransform& transform, PxU32 timeStamp)
|
||
|
|
{
|
||
|
|
if(mCoreNbObjects==mCoreCapacity)
|
||
|
|
resizeCore();
|
||
|
|
|
||
|
|
const PxU32 index = mCoreNbObjects++;
|
||
|
|
mCoreObjects[index] = object;
|
||
|
|
mCoreBoxes[index] = worldAABB; // PT: TODO: check assembly here
|
||
|
|
mCoreTransforms[index] = transform; // PT: TODO: check assembly here
|
||
|
|
mCoreRemap[index] = 0xffffffff;
|
||
|
|
|
||
|
|
// Objects are only inserted into the map once they're part of the main/core arrays.
|
||
|
|
#ifdef USE_REGULAR_HASH_MAP
|
||
|
|
bool ok = mMap.insert(object, BucketPrunerPair(index, timeStamp));
|
||
|
|
#else
|
||
|
|
BucketPrunerPair* ok = mMap.addPair(object, index, timeStamp);
|
||
|
|
#endif
|
||
|
|
PX_UNUSED(ok);
|
||
|
|
PX_ASSERT(ok);
|
||
|
|
}
|
||
|
|
|
||
|
|
bool BucketPrunerCore::addObject(const PrunerPayload& object, const PxBounds3& worldAABB, const PxTransform& transform, PxU32 timeStamp)
|
||
|
|
{
|
||
|
|
/*
|
||
|
|
We should probably use a bigger payload/userData struct here, which would also contains the external handle.
|
||
|
|
(EDIT: we can't even do that, because of the setExternalMemory function)
|
||
|
|
When asked to update/remove an object it would be O(n) to find the proper object in the mSortedObjects array.
|
||
|
|
|
||
|
|
-
|
||
|
|
|
||
|
|
For removing it we can simply empty the corresponding box, and the object will never be returned from queries.
|
||
|
|
Maybe this isn't even true, since boxes are sorted along one axis. So marking a box as empty could break the code relying on a sorted order.
|
||
|
|
An alternative is to mark the external handle as invalid, and ignore the object when a hit is found.
|
||
|
|
|
||
|
|
(EDIT: the sorting is now tested via data0/data1 anyway so we could mark the box as empty without breaking this)
|
||
|
|
|
||
|
|
-
|
||
|
|
|
||
|
|
For updating an object we would need to keep the (sub) array sorted (not the whole thing, only the array within a bucket).
|
||
|
|
We don't know the range (what part of the array maps to our bucket) but we may have the bucket ID somewhere? If we'd have this
|
||
|
|
we could parse the array left/right and resort just the right boxes. If we don't have this we may be able to "quickly" find the
|
||
|
|
range by traversing the tree, looking for the proper bucket. In any case I don't think there's a mapping to update within a bucket,
|
||
|
|
unlike in SAP or MBP. So we should be able to shuffle a bucket without having to update anything. For example there's no mapping
|
||
|
|
between the Core array and the Sorted array. It's a shame in a way because we'd need one, but it's not there - and in fact I think
|
||
|
|
we can free the Core array once Sorted is created, we don't need it at all.
|
||
|
|
|
||
|
|
If we don't want to re-sort the full bucket we can just mark it as dirty and ignore the sort-based early exits in the queries. Then we
|
||
|
|
can incrementally resort it over N frames or something.
|
||
|
|
|
||
|
|
This only works if the updated object remains in the same bucket though. If it moves to another bucket it becomes tempting to just remove
|
||
|
|
the object and re-insert it.
|
||
|
|
|
||
|
|
-
|
||
|
|
|
||
|
|
Now for adding an object, we can first have a "free pruner" and do the 16 next entries brute-force. Rebuilding every 16 objects might
|
||
|
|
give a good speedup already. Otherwise we need to do something more complicated.
|
||
|
|
*/
|
||
|
|
|
||
|
|
PX_ASSERT(mOwnMemory);
|
||
|
|
PX_ASSERT(!mDirty || !mNbFree);
|
||
|
|
if(!mDirty)
|
||
|
|
{
|
||
|
|
#ifdef FREE_PRUNER_SIZE
|
||
|
|
// In this path the structure is marked as valid. We do not want to invalidate it for each new object...
|
||
|
|
if(mNbFree<FREE_PRUNER_SIZE)
|
||
|
|
{
|
||
|
|
// ...so as long as there is space in the "free array", we store the newly added object there and
|
||
|
|
// return immediately. Subsequent queries will parse the free array as if it was a free pruner.
|
||
|
|
const PxU32 index = mNbFree++;
|
||
|
|
mFreeObjects[index] = object;
|
||
|
|
mFreeBounds[index] = worldAABB;
|
||
|
|
mFreeTransforms[index] = transform;
|
||
|
|
mFreeStamps[index] = timeStamp;
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
|
||
|
|
// If we reach this place, the free array is full. We must transfer the objects from the free array to
|
||
|
|
// the main (core) arrays, mark the structure as invalid, and still deal with the incoming object.
|
||
|
|
|
||
|
|
// First we transfer free objects, reset the number of free objects, and mark the structure as
|
||
|
|
// invalid/dirty (the core arrays will need rebuilding).
|
||
|
|
for(PxU32 i=0;i<mNbFree;i++)
|
||
|
|
addObjectInternal(mFreeObjects[i], mFreeBounds[i], mFreeTransforms[i], mFreeStamps[i]);
|
||
|
|
|
||
|
|
mNbFree = 0;
|
||
|
|
#endif
|
||
|
|
mDirty = true;
|
||
|
|
// mSortedNb = 0; // PT: TODO: investigate if this should be done here
|
||
|
|
|
||
|
|
// After that we still need to deal with the new incoming object (so far we only
|
||
|
|
// transferred the already existing objects from the full free array). This will
|
||
|
|
// happen automatically by letting the code continue to the regular codepath below.
|
||
|
|
}
|
||
|
|
|
||
|
|
// If we reach this place, the structure must be invalid and the incoming object
|
||
|
|
// must be added to the main arrays.
|
||
|
|
PX_ASSERT(mDirty);
|
||
|
|
|
||
|
|
addObjectInternal(object, worldAABB, transform, timeStamp);
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
|
||
|
|
bool BucketPrunerCore::removeObject(const PrunerPayload& object, PxU32& timeStamp)
|
||
|
|
{
|
||
|
|
// Even if the structure is already marked as dirty, we still need to update the
|
||
|
|
// core arrays and the map.
|
||
|
|
|
||
|
|
// The map only contains core objects, so we can use it to determine if the object
|
||
|
|
// exists in the core arrays or in the free array.
|
||
|
|
#ifdef USE_REGULAR_HASH_MAP
|
||
|
|
/* BucketPrunerPair entry;
|
||
|
|
if(mMap.findAndErase(object, entry))
|
||
|
|
{
|
||
|
|
PxU32 coreIndex = entry.mCoreIndex;
|
||
|
|
timeStamp = entry.mTimeStamp;*/
|
||
|
|
const BucketPrunerMap::Entry* removedEntry = mMap.find(object);
|
||
|
|
if(removedEntry)
|
||
|
|
{
|
||
|
|
PxU32 coreIndex = removedEntry->second.mCoreIndex;
|
||
|
|
timeStamp = removedEntry->second.mTimeStamp;
|
||
|
|
#else
|
||
|
|
PxU32 coreIndex; // This is the object's index in the core arrays.
|
||
|
|
if(mMap.removePair(object, coreIndex, timeStamp))
|
||
|
|
{
|
||
|
|
#endif
|
||
|
|
// In this codepath, the object we want to remove exists in the core arrays.
|
||
|
|
|
||
|
|
// We will need to remove it from both the core arrays & the sorted arrays.
|
||
|
|
const PxU32 sortedIndex = mCoreRemap[coreIndex]; // This is the object's index in the sorted arrays.
|
||
|
|
|
||
|
|
#ifdef USE_REGULAR_HASH_MAP
|
||
|
|
bool status = mMap.erase(object);
|
||
|
|
PX_ASSERT(status);
|
||
|
|
PX_UNUSED(status);
|
||
|
|
#endif
|
||
|
|
|
||
|
|
// First let's deal with the core arrays
|
||
|
|
mCoreNbObjects--;
|
||
|
|
if(coreIndex!=mCoreNbObjects)
|
||
|
|
{
|
||
|
|
// If it wasn't the last object in the array, close the gaps as usual
|
||
|
|
const PrunerPayload& movedObject = mCoreObjects[mCoreNbObjects];
|
||
|
|
mCoreBoxes[coreIndex] = mCoreBoxes[mCoreNbObjects];
|
||
|
|
mCoreTransforms[coreIndex] = mCoreTransforms[mCoreNbObjects];
|
||
|
|
mCoreObjects[coreIndex] = movedObject;
|
||
|
|
mCoreRemap[coreIndex] = mCoreRemap[mCoreNbObjects];
|
||
|
|
|
||
|
|
// Since we just moved the last object, its index in the core arrays has changed.
|
||
|
|
// We must reflect this change in the map.
|
||
|
|
#ifdef USE_REGULAR_HASH_MAP
|
||
|
|
BucketPrunerMap::Entry* movedEntry = const_cast<BucketPrunerMap::Entry*>(mMap.find(movedObject));
|
||
|
|
PX_ASSERT(movedEntry->second.mCoreIndex==mCoreNbObjects);
|
||
|
|
movedEntry->second.mCoreIndex = coreIndex;
|
||
|
|
#else
|
||
|
|
BucketPrunerPair* movedEntry = const_cast<BucketPrunerPair*>(mMap.findPair(movedObject));
|
||
|
|
PX_ASSERT(movedEntry->mCoreIndex==mCoreNbObjects);
|
||
|
|
movedEntry->mCoreIndex = coreIndex;
|
||
|
|
#endif
|
||
|
|
}
|
||
|
|
|
||
|
|
// Now, let's deal with the sorted arrays.
|
||
|
|
// If the structure is dirty, the sorted arrays will be rebuilt from scratch so there's no need to
|
||
|
|
// update them right now.
|
||
|
|
if(!mDirty)
|
||
|
|
{
|
||
|
|
// If the structure is valid, we want to keep it this way to avoid rebuilding sorted arrays after
|
||
|
|
// each removal. We can't "close the gaps" easily here because order of objects in the arrays matters.
|
||
|
|
|
||
|
|
// Instead we just invalidate the object by setting its bounding box as empty.
|
||
|
|
// Queries against empty boxes will never return a hit, so this effectively "removes" the object
|
||
|
|
// from any subsequent query results. Sorted arrays now contain a "disabled" object, until next build.
|
||
|
|
|
||
|
|
// Invalidating the box does not invalidate the sorting, since it's now captured in mData0/mData1.
|
||
|
|
// That is, mData0/mData1 keep their previous integer-encoded values, as if the box/object was still here.
|
||
|
|
mSortedWorldBoxes[sortedIndex].mCenter = PxVec3(0.0f);
|
||
|
|
mSortedWorldBoxes[sortedIndex].mExtents = PxVec3(-GU_EMPTY_BOUNDS_EXTENTS);
|
||
|
|
// Note that we don't touch mSortedObjects here. We could, but this is not necessary.
|
||
|
|
}
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
|
||
|
|
#ifdef FREE_PRUNER_SIZE
|
||
|
|
// Here, the object we want to remove exists in the free array. So we just parse it.
|
||
|
|
for(PxU32 i=0;i<mNbFree;i++)
|
||
|
|
{
|
||
|
|
if(mFreeObjects[i]==object)
|
||
|
|
{
|
||
|
|
// We found the object we want to remove. Close the gap as usual.
|
||
|
|
timeStamp = mFreeStamps[i];
|
||
|
|
mNbFree--;
|
||
|
|
mFreeBounds[i] = mFreeBounds[mNbFree];
|
||
|
|
mFreeTransforms[i] = mFreeTransforms[mNbFree];
|
||
|
|
mFreeObjects[i] = mFreeObjects[mNbFree];
|
||
|
|
mFreeStamps[i] = mFreeStamps[mNbFree];
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
#endif
|
||
|
|
// We didn't find the object. Can happen with a double remove. PX_ASSERT might be an option here.
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
|
||
|
|
bool BucketPrunerCore::updateObject(const PxBounds3& worldAABB, const PrunerPayload& object, const PxTransform& transform)
|
||
|
|
{
|
||
|
|
PxU32 timeStamp;
|
||
|
|
if(!removeObject(object, timeStamp))
|
||
|
|
return false;
|
||
|
|
|
||
|
|
return addObject(object, worldAABB, transform, timeStamp);
|
||
|
|
}
|
||
|
|
|
||
|
|
PxU32 BucketPrunerCore::removeMarkedObjects(PxU32 timeStamp)
|
||
|
|
{
|
||
|
|
PxU32 nbRemoved=0;
|
||
|
|
// PT: objects can be either in the hash-map, or in the 'free' array. First we look in the hash-map...
|
||
|
|
#ifdef USE_REGULAR_HASH_MAP
|
||
|
|
if(mMap.size())
|
||
|
|
#else
|
||
|
|
if(mMap.mNbActivePairs)
|
||
|
|
#endif
|
||
|
|
{
|
||
|
|
PxBounds3 empty;
|
||
|
|
empty.setEmpty();
|
||
|
|
const PxVec3 emptyCenter = empty.getCenter();
|
||
|
|
const PxVec3 emptyExtents = empty.getExtents();
|
||
|
|
|
||
|
|
// PT: hash-map is coalesced so we just parse it in linear order, no holes
|
||
|
|
PxU32 i=0;
|
||
|
|
#ifdef USE_REGULAR_HASH_MAP
|
||
|
|
PxU32 nbActivePairs = mMap.size();
|
||
|
|
const BucketPrunerMap::Entry* entries = mMap.mBase.getEntries();
|
||
|
|
#else
|
||
|
|
PxU32 nbActivePairs = mMap.mNbActivePairs;
|
||
|
|
#endif
|
||
|
|
PxU32 coreNbObjects = mCoreNbObjects; // PT: to avoid LHS
|
||
|
|
while(i<nbActivePairs)
|
||
|
|
{
|
||
|
|
#ifdef USE_REGULAR_HASH_MAP
|
||
|
|
const BucketPrunerMap::Entry& p = entries[i];
|
||
|
|
if(p.second.mTimeStamp==timeStamp)
|
||
|
|
#else
|
||
|
|
const BucketPrunerPair& p = mMap.mActivePairs[i];
|
||
|
|
if(p.mTimeStamp==timeStamp)
|
||
|
|
#endif
|
||
|
|
{
|
||
|
|
// PT: timestamps match. We must remove this object.
|
||
|
|
// PT: we replicate here what we do in BucketPrunerCore::removeObject(). See that function for details.
|
||
|
|
|
||
|
|
#ifdef USE_REGULAR_HASH_MAP
|
||
|
|
const PxU32 coreIndex = p.second.mCoreIndex;
|
||
|
|
#else
|
||
|
|
const PxU32 coreIndex = p.mCoreIndex;
|
||
|
|
#endif
|
||
|
|
if(!mDirty)
|
||
|
|
{
|
||
|
|
// PT: invalidating the box does not invalidate the sorting, since it's now captured in mData0/mData1
|
||
|
|
const PxU32 sortedIndex = mCoreRemap[coreIndex];
|
||
|
|
mSortedWorldBoxes[sortedIndex].mCenter = emptyCenter;
|
||
|
|
mSortedWorldBoxes[sortedIndex].mExtents = emptyExtents;
|
||
|
|
}
|
||
|
|
|
||
|
|
coreNbObjects--;
|
||
|
|
if(coreIndex!=coreNbObjects)
|
||
|
|
{
|
||
|
|
const PrunerPayload& movedObject = mCoreObjects[coreNbObjects];
|
||
|
|
mCoreBoxes[coreIndex] = mCoreBoxes[coreNbObjects];
|
||
|
|
mCoreTransforms[coreIndex] = mCoreTransforms[coreNbObjects];
|
||
|
|
mCoreObjects[coreIndex] = movedObject;
|
||
|
|
mCoreRemap[coreIndex] = mCoreRemap[coreNbObjects];
|
||
|
|
|
||
|
|
#ifdef USE_REGULAR_HASH_MAP
|
||
|
|
BucketPrunerMap::Entry* movedEntry = const_cast<BucketPrunerMap::Entry*>(mMap.find(movedObject));
|
||
|
|
PX_ASSERT(movedEntry->second.mCoreIndex==coreNbObjects);
|
||
|
|
movedEntry->second.mCoreIndex = coreIndex;
|
||
|
|
#else
|
||
|
|
BucketPrunerPair* movedEntry = const_cast<BucketPrunerPair*>(mMap.findPair(movedObject));
|
||
|
|
PX_ASSERT(movedEntry->mCoreIndex==coreNbObjects);
|
||
|
|
movedEntry->mCoreIndex = coreIndex;
|
||
|
|
#endif
|
||
|
|
}
|
||
|
|
|
||
|
|
nbRemoved++;
|
||
|
|
#ifdef USE_REGULAR_HASH_MAP
|
||
|
|
bool status = mMap.erase(p.first);
|
||
|
|
PX_ASSERT(status);
|
||
|
|
PX_UNUSED(status);
|
||
|
|
#else
|
||
|
|
const PxU32 hashValue = PxComputeHash(p.mData) & mMap.mMask;
|
||
|
|
mMap.removePairInternal(p.mData, hashValue, i);
|
||
|
|
#endif
|
||
|
|
nbActivePairs--;
|
||
|
|
}
|
||
|
|
else i++;
|
||
|
|
}
|
||
|
|
mCoreNbObjects = coreNbObjects;
|
||
|
|
|
||
|
|
#ifdef USE_REGULAR_HASH_MAP
|
||
|
|
#else
|
||
|
|
mMap.shrinkMemory();
|
||
|
|
#endif
|
||
|
|
}
|
||
|
|
|
||
|
|
#ifdef FREE_PRUNER_SIZE
|
||
|
|
// PT: ...then we look in the 'free' array
|
||
|
|
PxU32 i=0;
|
||
|
|
while(i<mNbFree)
|
||
|
|
{
|
||
|
|
if(mFreeStamps[i]==timeStamp)
|
||
|
|
{
|
||
|
|
nbRemoved++;
|
||
|
|
mNbFree--;
|
||
|
|
mFreeBounds[i] = mFreeBounds[mNbFree];
|
||
|
|
mFreeTransforms[i] = mFreeTransforms[mNbFree];
|
||
|
|
mFreeObjects[i] = mFreeObjects[mNbFree];
|
||
|
|
mFreeStamps[i] = mFreeStamps[mNbFree];
|
||
|
|
}
|
||
|
|
else i++;
|
||
|
|
}
|
||
|
|
#endif
|
||
|
|
return nbRemoved;
|
||
|
|
}
|
||
|
|
|
||
|
|
///////////////////////////////////////////////////////////////////////////////
|
||
|
|
|
||
|
|
static PxU32 sortBoxes( PxU32 nb, const PxBounds3* PX_RESTRICT boxes, const PrunerPayload* PX_RESTRICT objects,
|
||
|
|
const PxTransform* PX_RESTRICT transforms,
|
||
|
|
BucketBox& _globalBox, BucketBox* PX_RESTRICT sortedBoxes, PrunerPayload* PX_RESTRICT sortedObjects
|
||
|
|
, PxTransform* PX_RESTRICT sortedTransforms)
|
||
|
|
{
|
||
|
|
// Compute global box & sort axis
|
||
|
|
PxU32 sortAxis;
|
||
|
|
{
|
||
|
|
PX_ASSERT(nb>0);
|
||
|
|
Vec4V mergedMinV = V4LoadU(&boxes[nb-1].minimum.x);
|
||
|
|
Vec4V mergedMaxV = Vec4V_From_Vec3V(V3LoadU(&boxes[nb-1].maximum.x));
|
||
|
|
for(PxU32 i=0;i<nb-1;i++)
|
||
|
|
{
|
||
|
|
mergedMinV = V4Min(mergedMinV, V4LoadU(&boxes[i].minimum.x));
|
||
|
|
mergedMaxV = V4Max(mergedMaxV, V4LoadU(&boxes[i].maximum.x));
|
||
|
|
}
|
||
|
|
|
||
|
|
/* PX_ALIGN(16, PxVec4) mergedMin;
|
||
|
|
PX_ALIGN(16, PxVec4) mergedMax;
|
||
|
|
V4StoreA(mergedMinV, &mergedMin.x);
|
||
|
|
V4StoreA(mergedMaxV, &mergedMax.x);
|
||
|
|
|
||
|
|
_globalBox.mCenter.x = (mergedMax.x + mergedMin.x)*0.5f;
|
||
|
|
_globalBox.mCenter.y = (mergedMax.y + mergedMin.y)*0.5f;
|
||
|
|
_globalBox.mCenter.z = (mergedMax.z + mergedMin.z)*0.5f;
|
||
|
|
_globalBox.mExtents.x = (mergedMax.x - mergedMin.x)*0.5f;
|
||
|
|
_globalBox.mExtents.y = (mergedMax.y - mergedMin.y)*0.5f;
|
||
|
|
_globalBox.mExtents.z = (mergedMax.z - mergedMin.z)*0.5f;*/
|
||
|
|
|
||
|
|
const float Half = 0.5f;
|
||
|
|
const FloatV HalfV = FLoad(Half);
|
||
|
|
PX_ALIGN(16, PxVec4) mergedCenter;
|
||
|
|
PX_ALIGN(16, PxVec4) mergedExtents;
|
||
|
|
|
||
|
|
const Vec4V mergedCenterV = V4Scale(V4Add(mergedMaxV, mergedMinV), HalfV);
|
||
|
|
const Vec4V mergedExtentsV = V4Scale(V4Sub(mergedMaxV, mergedMinV), HalfV);
|
||
|
|
V4StoreA(mergedCenterV, &mergedCenter.x);
|
||
|
|
V4StoreA(mergedExtentsV, &mergedExtents.x);
|
||
|
|
_globalBox.mCenter = PxVec3(mergedCenter.x, mergedCenter.y, mergedCenter.z);
|
||
|
|
_globalBox.mExtents = PxVec3(mergedExtents.x, mergedExtents.y, mergedExtents.z);
|
||
|
|
|
||
|
|
const PxF32 absY = PxAbs(_globalBox.mExtents.y);
|
||
|
|
const PxF32 absZ = PxAbs(_globalBox.mExtents.z);
|
||
|
|
sortAxis = PxU32(absY < absZ ? 1 : 2);
|
||
|
|
// printf("Sort axis: %d\n", sortAxis);
|
||
|
|
}
|
||
|
|
|
||
|
|
float* keys = reinterpret_cast<float*>(sortedObjects);
|
||
|
|
for(PxU32 i=0;i<nb;i++)
|
||
|
|
keys[i] = boxes[i].minimum[sortAxis];
|
||
|
|
|
||
|
|
Cm::RadixSortBuffered rs; // ###TODO: some allocs here, remove
|
||
|
|
const PxU32* ranks = rs.Sort(keys, nb).GetRanks();
|
||
|
|
|
||
|
|
const float Half = 0.5f;
|
||
|
|
const FloatV HalfV = FLoad(Half);
|
||
|
|
for(PxU32 i=0;i<nb;i++)
|
||
|
|
{
|
||
|
|
const PxU32 index = *ranks++;
|
||
|
|
//const PxU32 index = local[i].index;
|
||
|
|
// sortedBoxes[i].mCenter = boxes[index].getCenter();
|
||
|
|
// sortedBoxes[i].mExtents = boxes[index].getExtents();
|
||
|
|
|
||
|
|
const Vec4V bucketBoxMinV = V4LoadU(&boxes[index].minimum.x);
|
||
|
|
const Vec4V bucketBoxMaxV = Vec4V_From_Vec3V(V3LoadU(&boxes[index].maximum.x));
|
||
|
|
const Vec4V bucketBoxCenterV = V4Scale(V4Add(bucketBoxMaxV, bucketBoxMinV), HalfV);
|
||
|
|
const Vec4V bucketBoxExtentsV = V4Scale(V4Sub(bucketBoxMaxV, bucketBoxMinV), HalfV);
|
||
|
|
// We don't need to preserve data0/data1 here
|
||
|
|
AlignedStore(bucketBoxCenterV, &sortedBoxes[i].mCenter.x);
|
||
|
|
AlignedStore(bucketBoxExtentsV, &sortedBoxes[i].mExtents.x);
|
||
|
|
|
||
|
|
#if PX_DEBUG
|
||
|
|
sortedBoxes[i].mDebugMin = boxes[index].minimum[sortAxis];
|
||
|
|
#endif
|
||
|
|
sortedObjects[i] = objects[index];
|
||
|
|
sortedTransforms[i] = transforms[index];
|
||
|
|
}
|
||
|
|
|
||
|
|
return sortAxis;
|
||
|
|
}
|
||
|
|
|
||
|
|
#ifdef NODE_SORT
|
||
|
|
template<class T>
|
||
|
|
PX_CUDA_CALLABLE PX_FORCE_INLINE void tswap(T& x, T& y)
|
||
|
|
{
|
||
|
|
T tmp = x;
|
||
|
|
x = y;
|
||
|
|
y = tmp;
|
||
|
|
}
|
||
|
|
|
||
|
|
/* PX_FORCE_INLINE __m128 DotV(const __m128 a, const __m128 b)
|
||
|
|
{
|
||
|
|
const __m128 dot1 = _mm_mul_ps(a, b);
|
||
|
|
const __m128 shuf1 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(dot1), _MM_SHUFFLE(0,0,0,0)));
|
||
|
|
const __m128 shuf2 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(dot1), _MM_SHUFFLE(1,1,1,1)));
|
||
|
|
const __m128 shuf3 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(dot1), _MM_SHUFFLE(2,2,2,2)));
|
||
|
|
return _mm_add_ps(_mm_add_ps(shuf1, shuf2), shuf3);
|
||
|
|
}*/
|
||
|
|
|
||
|
|
// PT: hmmm, by construction, isn't the order always the same for all bucket pruners?
|
||
|
|
// => maybe not because the bucket boxes are still around the merged aabbs, not around the bucket
|
||
|
|
// Still we could do something here
|
||
|
|
static /*PX_FORCE_INLINE*/ PxU32 sort(const BucketPrunerNode& parent, const PxVec3& rayDir)
|
||
|
|
{
|
||
|
|
const PxU32 totalCount = parent.mCounters[0]+parent.mCounters[1]+parent.mCounters[2]+parent.mCounters[3]+parent.mCounters[4];
|
||
|
|
if(totalCount<NODE_SORT_MIN_COUNT)
|
||
|
|
return 0|(1<<3)|(2<<6)|(3<<9)|(4<<12);
|
||
|
|
|
||
|
|
float dp[5];
|
||
|
|
/* const __m128 rayDirV = _mm_loadu_ps(&rayDir.x);
|
||
|
|
__m128 dp0V = DotV(rayDirV, _mm_loadu_ps(&parent.mBucketBox[0].mCenter.x)); _mm_store_ss(&dp[0], dp0V);
|
||
|
|
__m128 dp1V = DotV(rayDirV, _mm_loadu_ps(&parent.mBucketBox[1].mCenter.x)); _mm_store_ss(&dp[1], dp1V);
|
||
|
|
__m128 dp2V = DotV(rayDirV, _mm_loadu_ps(&parent.mBucketBox[2].mCenter.x)); _mm_store_ss(&dp[2], dp2V);
|
||
|
|
__m128 dp3V = DotV(rayDirV, _mm_loadu_ps(&parent.mBucketBox[3].mCenter.x)); _mm_store_ss(&dp[3], dp3V);
|
||
|
|
__m128 dp4V = DotV(rayDirV, _mm_loadu_ps(&parent.mBucketBox[4].mCenter.x)); _mm_store_ss(&dp[4], dp4V);
|
||
|
|
*/
|
||
|
|
|
||
|
|
#ifdef VERIFY_SORT
|
||
|
|
PxU32 code;
|
||
|
|
{
|
||
|
|
dp[0] = parent.mCounters[0] ? PxAbs(parent.mBucketBox[0].mCenter.dot(rayDir)) : PX_MAX_F32;
|
||
|
|
dp[1] = parent.mCounters[1] ? PxAbs(parent.mBucketBox[1].mCenter.dot(rayDir)) : PX_MAX_F32;
|
||
|
|
dp[2] = parent.mCounters[2] ? PxAbs(parent.mBucketBox[2].mCenter.dot(rayDir)) : PX_MAX_F32;
|
||
|
|
dp[3] = parent.mCounters[3] ? PxAbs(parent.mBucketBox[3].mCenter.dot(rayDir)) : PX_MAX_F32;
|
||
|
|
dp[4] = parent.mCounters[4] ? PxAbs(parent.mBucketBox[4].mCenter.dot(rayDir)) : PX_MAX_F32;
|
||
|
|
|
||
|
|
PxU32 ii0 = 0;
|
||
|
|
PxU32 ii1 = 1;
|
||
|
|
PxU32 ii2 = 2;
|
||
|
|
PxU32 ii3 = 3;
|
||
|
|
PxU32 ii4 = 4;
|
||
|
|
|
||
|
|
// PT: using integer cmps since we used fabsf above
|
||
|
|
// const PxU32* values = reinterpret_cast<const PxU32*>(dp);
|
||
|
|
const PxU32* values = PxUnionCast<PxU32*, PxF32*>(dp);
|
||
|
|
|
||
|
|
PxU32 value0 = values[0];
|
||
|
|
PxU32 value1 = values[1];
|
||
|
|
PxU32 value2 = values[2];
|
||
|
|
PxU32 value3 = values[3];
|
||
|
|
PxU32 value4 = values[4];
|
||
|
|
|
||
|
|
for(PxU32 j=0;j<5-1;j++)
|
||
|
|
{
|
||
|
|
if(value1<value0)
|
||
|
|
{
|
||
|
|
tswap(value0, value1);
|
||
|
|
tswap(ii0, ii1);
|
||
|
|
}
|
||
|
|
if(value2<value1)
|
||
|
|
{
|
||
|
|
tswap(value1, value2);
|
||
|
|
tswap(ii1, ii2);
|
||
|
|
}
|
||
|
|
if(value3<value2)
|
||
|
|
{
|
||
|
|
tswap(value2, value3);
|
||
|
|
tswap(ii2, ii3);
|
||
|
|
}
|
||
|
|
if(value4<value3)
|
||
|
|
{
|
||
|
|
tswap(value3, value4);
|
||
|
|
tswap(ii3, ii4);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
//return ii0|(ii1<<3)|(ii2<<6)|(ii3<<9)|(ii4<<12);
|
||
|
|
code = ii0|(ii1<<3)|(ii2<<6)|(ii3<<9)|(ii4<<12);
|
||
|
|
}
|
||
|
|
#endif
|
||
|
|
|
||
|
|
dp[0] = parent.mCounters[0] ? parent.mBucketBox[0].mCenter.dot(rayDir) : PX_MAX_F32;
|
||
|
|
dp[1] = parent.mCounters[1] ? parent.mBucketBox[1].mCenter.dot(rayDir) : PX_MAX_F32;
|
||
|
|
dp[2] = parent.mCounters[2] ? parent.mBucketBox[2].mCenter.dot(rayDir) : PX_MAX_F32;
|
||
|
|
dp[3] = parent.mCounters[3] ? parent.mBucketBox[3].mCenter.dot(rayDir) : PX_MAX_F32;
|
||
|
|
dp[4] = parent.mCounters[4] ? parent.mBucketBox[4].mCenter.dot(rayDir) : PX_MAX_F32;
|
||
|
|
|
||
|
|
const PxU32* values = PxUnionCast<PxU32*, PxF32*>(dp);
|
||
|
|
|
||
|
|
// const PxU32 mask = ~7U;
|
||
|
|
const PxU32 mask = 0x7ffffff8;
|
||
|
|
PxU32 value0 = (values[0]&mask);
|
||
|
|
PxU32 value1 = (values[1]&mask)|1;
|
||
|
|
PxU32 value2 = (values[2]&mask)|2;
|
||
|
|
PxU32 value3 = (values[3]&mask)|3;
|
||
|
|
PxU32 value4 = (values[4]&mask)|4;
|
||
|
|
|
||
|
|
#define SORT_BLOCK \
|
||
|
|
if(value1<value0) tswap(value0, value1); \
|
||
|
|
if(value2<value1) tswap(value1, value2); \
|
||
|
|
if(value3<value2) tswap(value2, value3); \
|
||
|
|
if(value4<value3) tswap(value3, value4);
|
||
|
|
SORT_BLOCK
|
||
|
|
SORT_BLOCK
|
||
|
|
SORT_BLOCK
|
||
|
|
SORT_BLOCK
|
||
|
|
|
||
|
|
const PxU32 ii0 = value0&7;
|
||
|
|
const PxU32 ii1 = value1&7;
|
||
|
|
const PxU32 ii2 = value2&7;
|
||
|
|
const PxU32 ii3 = value3&7;
|
||
|
|
const PxU32 ii4 = value4&7;
|
||
|
|
const PxU32 code2 = ii0|(ii1<<3)|(ii2<<6)|(ii3<<9)|(ii4<<12);
|
||
|
|
#ifdef VERIFY_SORT
|
||
|
|
PX_ASSERT(code2==code);
|
||
|
|
#endif
|
||
|
|
return code2;
|
||
|
|
}
|
||
|
|
|
||
|
|
static void gPrecomputeSort(BucketPrunerNode& node, const PxVec3* PX_RESTRICT dirs)
|
||
|
|
{
|
||
|
|
for(int i=0;i<8;i++)
|
||
|
|
node.mOrder[i] = PxTo16(sort(node, dirs[i]));
|
||
|
|
}
|
||
|
|
#endif
|
||
|
|
|
||
|
|
void BucketPrunerCore::classifyBoxes()
|
||
|
|
{
|
||
|
|
if(!mDirty)
|
||
|
|
return;
|
||
|
|
|
||
|
|
mDirty = false;
|
||
|
|
|
||
|
|
const PxU32 nb = mCoreNbObjects;
|
||
|
|
if(!nb)
|
||
|
|
{
|
||
|
|
mSortedNb=0;
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
|
||
|
|
PX_ASSERT(!mNbFree);
|
||
|
|
|
||
|
|
#ifdef BRUTE_FORCE_LIMIT
|
||
|
|
if(nb<=BRUTE_FORCE_LIMIT)
|
||
|
|
{
|
||
|
|
allocateSortedMemory(nb);
|
||
|
|
BucketBox* sortedBoxes = mSortedWorldBoxes;
|
||
|
|
PrunerPayload* sortedObjects = mSortedObjects;
|
||
|
|
|
||
|
|
const float Half = 0.5f;
|
||
|
|
const __m128 HalfV = _mm_load1_ps(&Half);
|
||
|
|
PX_ALIGN(16, PxVec4) bucketCenter;
|
||
|
|
PX_ALIGN(16, PxVec4) bucketExtents;
|
||
|
|
for(PxU32 i=0;i<nb;i++)
|
||
|
|
{
|
||
|
|
const __m128 bucketBoxMinV = _mm_loadu_ps(&mCoreBoxes[i].minimum.x);
|
||
|
|
const __m128 bucketBoxMaxV = _mm_loadu_ps(&mCoreBoxes[i].maximum.x);
|
||
|
|
const __m128 bucketBoxCenterV = _mm_mul_ps(_mm_add_ps(bucketBoxMaxV, bucketBoxMinV), HalfV);
|
||
|
|
const __m128 bucketBoxExtentsV = _mm_mul_ps(_mm_sub_ps(bucketBoxMaxV, bucketBoxMinV), HalfV);
|
||
|
|
_mm_store_ps(&bucketCenter.x, bucketBoxCenterV);
|
||
|
|
_mm_store_ps(&bucketExtents.x, bucketBoxExtentsV);
|
||
|
|
sortedBoxes[i].mCenter = PxVec3(bucketCenter.x, bucketCenter.y, bucketCenter.z);
|
||
|
|
sortedBoxes[i].mExtents = PxVec3(bucketExtents.x, bucketExtents.y, bucketExtents.z);
|
||
|
|
|
||
|
|
sortedObjects[i] = mCoreObjects[i];
|
||
|
|
}
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
#endif
|
||
|
|
|
||
|
|
|
||
|
|
size_t* remap = reinterpret_cast<size_t*>(PX_ALLOC(nb*sizeof(size_t), ""));
|
||
|
|
for(PxU32 i=0;i<nb;i++)
|
||
|
|
{
|
||
|
|
remap[i] = mCoreObjects[i].data[0];
|
||
|
|
mCoreObjects[i].data[0] = i;
|
||
|
|
}
|
||
|
|
|
||
|
|
// printf("Nb objects: %d\n", nb);
|
||
|
|
|
||
|
|
PrunerPayload localTempObjects[LOCAL_SIZE];
|
||
|
|
BucketBox localTempBoxes[LOCAL_SIZE];
|
||
|
|
PxTransform localTempTransforms[LOCAL_SIZE];
|
||
|
|
PrunerPayload* tempObjects;
|
||
|
|
PxTransform* tempTransforms;
|
||
|
|
BucketBox* tempBoxes;
|
||
|
|
if(nb>LOCAL_SIZE)
|
||
|
|
{
|
||
|
|
tempObjects = PX_ALLOCATE(PrunerPayload, nb, "BucketPruner");
|
||
|
|
tempBoxes = PX_ALLOCATE(BucketBox, nb, "BucketPruner");
|
||
|
|
tempTransforms = PX_ALLOCATE(PxTransform, nb, "BucketPruner");
|
||
|
|
}
|
||
|
|
else
|
||
|
|
{
|
||
|
|
tempObjects = localTempObjects;
|
||
|
|
tempBoxes = localTempBoxes;
|
||
|
|
tempTransforms = localTempTransforms;
|
||
|
|
}
|
||
|
|
|
||
|
|
mSortAxis = sortBoxes(nb, mCoreBoxes, mCoreObjects, mCoreTransforms, mGlobalBox, tempBoxes, tempObjects, tempTransforms);
|
||
|
|
|
||
|
|
PX_ASSERT(mSortAxis);
|
||
|
|
|
||
|
|
allocateSortedMemory(nb);
|
||
|
|
BucketBox* sortedBoxes = mSortedWorldBoxes;
|
||
|
|
PrunerPayload* sortedObjects = mSortedObjects;
|
||
|
|
PxTransform* sortedTransforms = mSortedTransforms;
|
||
|
|
|
||
|
|
const PxU32 yz = PxU32(mSortAxis == 1 ? 2 : 1);
|
||
|
|
const float limitX = mGlobalBox.mCenter.x;
|
||
|
|
const float limitYZ = mGlobalBox.mCenter[yz];
|
||
|
|
mLevel1.classifyBoxes(limitX, limitYZ, nb, tempBoxes, tempObjects, tempTransforms, sortedBoxes, sortedObjects, sortedTransforms, false, mSortAxis);
|
||
|
|
|
||
|
|
processChildBuckets(nb, tempBoxes, tempObjects, tempTransforms, mLevel1, mLevel2, mSortedWorldBoxes, mSortedObjects, mSortedTransforms, mSortAxis);
|
||
|
|
|
||
|
|
for(PxU32 j=0;j<5;j++)
|
||
|
|
processChildBuckets(nb, tempBoxes, tempObjects, tempTransforms, mLevel2[j], mLevel3[j], mSortedWorldBoxes + mLevel1.mOffsets[j], mSortedObjects + mLevel1.mOffsets[j], mSortedTransforms + mLevel1.mOffsets[j], mSortAxis);
|
||
|
|
|
||
|
|
{
|
||
|
|
for(PxU32 i=0;i<nb;i++)
|
||
|
|
{
|
||
|
|
encodeBoxMinMax(mSortedWorldBoxes[i], mSortAxis);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
if(nb>LOCAL_SIZE)
|
||
|
|
{
|
||
|
|
PX_FREE(tempTransforms);
|
||
|
|
PX_FREE(tempBoxes);
|
||
|
|
PX_FREE(tempObjects);
|
||
|
|
}
|
||
|
|
|
||
|
|
for(PxU32 i=0;i<nb;i++)
|
||
|
|
{
|
||
|
|
const PxU32 coreIndex = PxU32(mSortedObjects[i].data[0]);
|
||
|
|
const size_t saved = remap[coreIndex];
|
||
|
|
mSortedObjects[i].data[0] = saved;
|
||
|
|
mCoreObjects[coreIndex].data[0] = saved;
|
||
|
|
if(mCoreRemap)
|
||
|
|
mCoreRemap[coreIndex] = i;
|
||
|
|
// remap[i] = mCoreObjects[i].data[0];
|
||
|
|
// mCoreObjects[i].data[0] = i;
|
||
|
|
}
|
||
|
|
PX_FREE(remap);
|
||
|
|
|
||
|
|
/* if(mOwnMemory)
|
||
|
|
{
|
||
|
|
PX_FREE(mCoreBoxes);
|
||
|
|
PX_FREE(mCoreObjects);
|
||
|
|
}*/
|
||
|
|
|
||
|
|
|
||
|
|
#ifdef NODE_SORT
|
||
|
|
{
|
||
|
|
PxVec3 dirs[8];
|
||
|
|
dirs[0] = PxVec3(1.0f, 1.0f, 1.0f);
|
||
|
|
dirs[1] = PxVec3(1.0f, 1.0f, -1.0f);
|
||
|
|
dirs[2] = PxVec3(1.0f, -1.0f, 1.0f);
|
||
|
|
dirs[3] = PxVec3(1.0f, -1.0f, -1.0f);
|
||
|
|
dirs[4] = PxVec3(-1.0f, 1.0f, 1.0f);
|
||
|
|
dirs[5] = PxVec3(-1.0f, 1.0f, -1.0f);
|
||
|
|
dirs[6] = PxVec3(-1.0f, -1.0f, 1.0f);
|
||
|
|
dirs[7] = PxVec3(-1.0f, -1.0f, -1.0f);
|
||
|
|
for(int i=0;i<8;i++)
|
||
|
|
dirs[i].normalize();
|
||
|
|
|
||
|
|
gPrecomputeSort(mLevel1, dirs);
|
||
|
|
|
||
|
|
for(PxU32 i=0;i<5;i++)
|
||
|
|
gPrecomputeSort(mLevel2[i], dirs);
|
||
|
|
|
||
|
|
for(PxU32 j=0;j<5;j++)
|
||
|
|
{
|
||
|
|
for(PxU32 i=0;i<5;i++)
|
||
|
|
gPrecomputeSort(mLevel3[j][i], dirs);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
#endif
|
||
|
|
}
|
||
|
|
|
||
|
|
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||
|
|
|
||
|
|
#ifdef CAN_USE_MOVEMASK
|
||
|
|
namespace
|
||
|
|
{
|
||
|
|
struct RayParams
|
||
|
|
{
|
||
|
|
PX_ALIGN(16, PxVec3 mData2); float padding0;
|
||
|
|
PX_ALIGN(16, PxVec3 mFDir); float padding1;
|
||
|
|
PX_ALIGN(16, PxVec3 mData); float padding2;
|
||
|
|
PX_ALIGN(16, PxVec3 mInflate); float padding3;
|
||
|
|
};
|
||
|
|
}
|
||
|
|
static PX_FORCE_INLINE void precomputeRayData(RayParams* PX_RESTRICT rayParams, const PxVec3& rayOrig, const PxVec3& rayDir, float maxDist)
|
||
|
|
{
|
||
|
|
#ifdef USE_SIMD
|
||
|
|
const float Half = 0.5f * maxDist;
|
||
|
|
const __m128 HalfV = _mm_load1_ps(&Half);
|
||
|
|
const __m128 DataV = _mm_mul_ps(_mm_loadu_ps(&rayDir.x), HalfV);
|
||
|
|
const __m128 Data2V = _mm_add_ps(_mm_loadu_ps(&rayOrig.x), DataV);
|
||
|
|
const PxU32 MaskI = 0x7fffffff;
|
||
|
|
const __m128 FDirV = _mm_and_ps(_mm_load1_ps(reinterpret_cast<const float*>(&MaskI)), DataV);
|
||
|
|
_mm_store_ps(&rayParams->mData.x, DataV);
|
||
|
|
_mm_store_ps(&rayParams->mData2.x, Data2V);
|
||
|
|
_mm_store_ps(&rayParams->mFDir.x, FDirV);
|
||
|
|
#else
|
||
|
|
const PxVec3 data = 0.5f * rayDir * maxDist;
|
||
|
|
rayParams->mData = data;
|
||
|
|
rayParams->mData2 = rayOrig + data;
|
||
|
|
rayParams->mFDir.x = PxAbs(data.x);
|
||
|
|
rayParams->mFDir.y = PxAbs(data.y);
|
||
|
|
rayParams->mFDir.z = PxAbs(data.z);
|
||
|
|
#endif
|
||
|
|
}
|
||
|
|
|
||
|
|
template <int inflateT>
|
||
|
|
static PX_FORCE_INLINE PxIntBool segmentAABB(const BucketBox& box, const RayParams* PX_RESTRICT params)
|
||
|
|
{
|
||
|
|
#ifdef USE_SIMD
|
||
|
|
const PxU32 maskI = 0x7fffffff;
|
||
|
|
const __m128 fdirV = _mm_load_ps(¶ms->mFDir.x);
|
||
|
|
// #ifdef _DEBUG
|
||
|
|
const __m128 extentsV = inflateT ? _mm_add_ps(_mm_loadu_ps(&box.mExtents.x), _mm_load_ps(¶ms->mInflate.x)) : _mm_loadu_ps(&box.mExtents.x);
|
||
|
|
const __m128 DV = _mm_sub_ps(_mm_load_ps(¶ms->mData2.x), _mm_loadu_ps(&box.mCenter.x));
|
||
|
|
/* #else
|
||
|
|
const __m128 extentsV = inflateT ? _mm_add_ps(_mm_load_ps(&box.mExtents.x), _mm_load_ps(¶ms->mInflate.x)) : _mm_load_ps(&box.mExtents.x);
|
||
|
|
const __m128 DV = _mm_sub_ps(_mm_load_ps(¶ms->mData2.x), _mm_load_ps(&box.mCenter.x));
|
||
|
|
#endif*/
|
||
|
|
__m128 absDV = _mm_and_ps(DV, _mm_load1_ps(reinterpret_cast<const float*>(&maskI)));
|
||
|
|
absDV = _mm_cmpgt_ps(absDV, _mm_add_ps(extentsV, fdirV));
|
||
|
|
const PxU32 test = PxU32(_mm_movemask_ps(absDV));
|
||
|
|
if(test&7)
|
||
|
|
return 0;
|
||
|
|
|
||
|
|
const __m128 dataZYX_V = _mm_load_ps(¶ms->mData.x);
|
||
|
|
const __m128 dataXZY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(dataZYX_V), _MM_SHUFFLE(3,0,2,1)));
|
||
|
|
const __m128 DXZY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(DV), _MM_SHUFFLE(3,0,2,1)));
|
||
|
|
const __m128 fV = _mm_sub_ps(_mm_mul_ps(dataZYX_V, DXZY_V), _mm_mul_ps(dataXZY_V, DV));
|
||
|
|
|
||
|
|
const __m128 fdirZYX_V = _mm_load_ps(¶ms->mFDir.x);
|
||
|
|
const __m128 fdirXZY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(fdirZYX_V), _MM_SHUFFLE(3,0,2,1)));
|
||
|
|
const __m128 extentsXZY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(extentsV), _MM_SHUFFLE(3,0,2,1)));
|
||
|
|
const __m128 fg = _mm_add_ps(_mm_mul_ps(extentsV, fdirXZY_V), _mm_mul_ps(extentsXZY_V, fdirZYX_V));
|
||
|
|
|
||
|
|
__m128 absfV = _mm_and_ps(fV, _mm_load1_ps(reinterpret_cast<const float*>(&maskI)));
|
||
|
|
absfV = _mm_cmpgt_ps(absfV, fg);
|
||
|
|
const PxU32 test2 = PxU32(_mm_movemask_ps(absfV));
|
||
|
|
if(test2&7)
|
||
|
|
return 0;
|
||
|
|
return 1;
|
||
|
|
#else
|
||
|
|
const float boxExtentsx = inflateT ? box.mExtents.x + params->mInflate.x : box.mExtents.x;
|
||
|
|
const float Dx = params->mData2.x - box.mCenter.x; if(fabsf(Dx) > boxExtentsx + params->mFDir.x) return PxIntFalse;
|
||
|
|
|
||
|
|
const float boxExtentsz = inflateT ? box.mExtents.z + params->mInflate.z : box.mExtents.z;
|
||
|
|
const float Dz = params->mData2.z - box.mCenter.z; if(fabsf(Dz) > boxExtentsz + params->mFDir.z) return PxIntFalse;
|
||
|
|
|
||
|
|
const float boxExtentsy = inflateT ? box.mExtents.y + params->mInflate.y : box.mExtents.y;
|
||
|
|
const float Dy = params->mData2.y - box.mCenter.y; if(fabsf(Dy) > boxExtentsy + params->mFDir.y) return PxIntFalse;
|
||
|
|
|
||
|
|
float f;
|
||
|
|
f = params->mData.y * Dz - params->mData.z * Dy; if(fabsf(f) > boxExtentsy*params->mFDir.z + boxExtentsz*params->mFDir.y) return PxIntFalse;
|
||
|
|
f = params->mData.z * Dx - params->mData.x * Dz; if(fabsf(f) > boxExtentsx*params->mFDir.z + boxExtentsz*params->mFDir.x) return PxIntFalse;
|
||
|
|
f = params->mData.x * Dy - params->mData.y * Dx; if(fabsf(f) > boxExtentsx*params->mFDir.y + boxExtentsy*params->mFDir.x) return PxIntFalse;
|
||
|
|
return PxIntTrue;
|
||
|
|
#endif
|
||
|
|
}
|
||
|
|
#else
|
||
|
|
#include "GuBVHTestsSIMD.h"
|
||
|
|
|
||
|
|
typedef RayAABBTest BPRayAABBTest;
|
||
|
|
|
||
|
|
template <int inflateT>
|
||
|
|
static PX_FORCE_INLINE PxIntBool segmentAABB(const BucketBox& box, const BPRayAABBTest& test)
|
||
|
|
{
|
||
|
|
return static_cast<PxIntBool>(test.check<inflateT>(V3LoadU(box.mCenter), V3LoadU(box.mExtents)));
|
||
|
|
}
|
||
|
|
|
||
|
|
/*static PX_FORCE_INLINE IntBool segmentAABB(const BucketBox& box, const BPRayAABBTest& test, PxU32 rayMinLimitX, PxU32 rayMaxLimitX)
|
||
|
|
{
|
||
|
|
if(rayMinLimitX>box.mData1)
|
||
|
|
return 0;
|
||
|
|
if(rayMaxLimitX<box.mData0)
|
||
|
|
return 0;
|
||
|
|
|
||
|
|
return test(Vec3V_From_PxVec3(box.mCenter), Vec3V_From_PxVec3(box.mExtents));
|
||
|
|
}*/
|
||
|
|
#endif
|
||
|
|
|
||
|
|
namespace
|
||
|
|
{
|
||
|
|
struct BucketPrunerRaycastAdapter
|
||
|
|
{
|
||
|
|
PX_FORCE_INLINE BucketPrunerRaycastAdapter(PrunerRaycastCallback& pcb, const PrunerPayload* payloads, const PxTransform* transforms) :
|
||
|
|
mCallback(pcb), mPayloads(payloads), mTransforms(transforms) {}
|
||
|
|
|
||
|
|
PX_FORCE_INLINE bool invoke(PxReal& distance, PxU32 primIndex)
|
||
|
|
{
|
||
|
|
return mCallback.invoke(distance, primIndex, mPayloads, mTransforms);
|
||
|
|
}
|
||
|
|
|
||
|
|
PrunerRaycastCallback& mCallback;
|
||
|
|
const PrunerPayload* mPayloads;
|
||
|
|
const PxTransform* mTransforms;
|
||
|
|
PX_NOCOPY(BucketPrunerRaycastAdapter)
|
||
|
|
};
|
||
|
|
|
||
|
|
struct BucketPrunerOverlapAdapter
|
||
|
|
{
|
||
|
|
PX_FORCE_INLINE BucketPrunerOverlapAdapter(PrunerOverlapCallback& pcb, const PrunerPayload* payloads, const PxTransform* transforms) :
|
||
|
|
mCallback(pcb), mPayloads(payloads), mTransforms(transforms) {}
|
||
|
|
|
||
|
|
PX_FORCE_INLINE bool invoke(PxU32 primIndex)
|
||
|
|
{
|
||
|
|
return mCallback.invoke(primIndex, mPayloads, mTransforms);
|
||
|
|
}
|
||
|
|
|
||
|
|
PrunerOverlapCallback& mCallback;
|
||
|
|
const PrunerPayload* mPayloads;
|
||
|
|
const PxTransform* mTransforms;
|
||
|
|
PX_NOCOPY(BucketPrunerOverlapAdapter)
|
||
|
|
};
|
||
|
|
|
||
|
|
}
|
||
|
|
|
||
|
|
template <int inflateT>
|
||
|
|
static bool processBucket(
|
||
|
|
PxU32 nb, const BucketBox* PX_RESTRICT baseBoxes, const PrunerPayload* PX_RESTRICT baseObjects,
|
||
|
|
const PxTransform* PX_RESTRICT baseTransforms, PxU32 offset, PxU32 totalAllocated,
|
||
|
|
const PxVec3& rayOrig, const PxVec3& rayDir, float& maxDist,
|
||
|
|
#ifdef CAN_USE_MOVEMASK
|
||
|
|
RayParams* PX_RESTRICT rayParams,
|
||
|
|
#else
|
||
|
|
BPRayAABBTest& test, const PxVec3& inflate,
|
||
|
|
#endif
|
||
|
|
PrunerRaycastCallback& pcbArgName, PxU32& _rayMinLimitInt, PxU32& _rayMaxLimitInt, PxU32 sortAxis)
|
||
|
|
{
|
||
|
|
PX_UNUSED(totalAllocated);
|
||
|
|
|
||
|
|
const BucketBox* PX_RESTRICT _boxes = baseBoxes + offset;
|
||
|
|
BucketPrunerRaycastAdapter pcb(pcbArgName, baseObjects + offset, baseTransforms + offset);
|
||
|
|
|
||
|
|
PxU32 rayMinLimitInt = _rayMinLimitInt;
|
||
|
|
PxU32 rayMaxLimitInt = _rayMaxLimitInt;
|
||
|
|
|
||
|
|
const BucketBox* last = _boxes + nb;
|
||
|
|
|
||
|
|
PxU32 objectID = 0;
|
||
|
|
|
||
|
|
while(_boxes!=last)
|
||
|
|
{
|
||
|
|
const BucketBox& currentBox = *_boxes++;
|
||
|
|
const PxU32 currentID = objectID++;
|
||
|
|
|
||
|
|
if(currentBox.mData1<rayMinLimitInt)
|
||
|
|
continue;
|
||
|
|
|
||
|
|
if(currentBox.mData0>rayMaxLimitInt)
|
||
|
|
goto Exit;
|
||
|
|
|
||
|
|
#ifdef CAN_USE_MOVEMASK
|
||
|
|
if(!segmentAABB<inflateT>(currentBox, rayParams))
|
||
|
|
continue;
|
||
|
|
#else
|
||
|
|
if(!segmentAABB<inflateT>(currentBox, test))
|
||
|
|
continue;
|
||
|
|
#endif
|
||
|
|
|
||
|
|
const float MaxDist = maxDist;
|
||
|
|
const bool again = pcb.invoke(maxDist, currentID);
|
||
|
|
if(!again)
|
||
|
|
return false;
|
||
|
|
if(maxDist < MaxDist)
|
||
|
|
{
|
||
|
|
float rayMinLimit, rayMaxLimit;
|
||
|
|
#ifdef CAN_USE_MOVEMASK
|
||
|
|
if(inflateT)
|
||
|
|
computeRayLimits(rayMinLimit, rayMaxLimit, rayOrig, rayDir, maxDist, rayParams->mInflate, sortAxis);
|
||
|
|
else
|
||
|
|
computeRayLimits(rayMinLimit, rayMaxLimit, rayOrig, rayDir, maxDist, sortAxis);
|
||
|
|
|
||
|
|
precomputeRayData(rayParams, rayOrig, rayDir, maxDist);
|
||
|
|
#else
|
||
|
|
if(inflateT)
|
||
|
|
computeRayLimits(rayMinLimit, rayMaxLimit, rayOrig, rayDir, maxDist, inflate, sortAxis);
|
||
|
|
else
|
||
|
|
computeRayLimits(rayMinLimit, rayMaxLimit, rayOrig, rayDir, maxDist, sortAxis);
|
||
|
|
|
||
|
|
test.setDistance(maxDist);
|
||
|
|
#endif
|
||
|
|
const PxU32* binaryMinLimit = reinterpret_cast<const PxU32*>(&rayMinLimit);
|
||
|
|
const PxU32* binaryMaxLimit = reinterpret_cast<const PxU32*>(&rayMaxLimit);
|
||
|
|
rayMinLimitInt = encodeFloat(binaryMinLimit[0]);
|
||
|
|
rayMaxLimitInt = encodeFloat(binaryMaxLimit[0]);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
Exit:
|
||
|
|
|
||
|
|
_rayMinLimitInt = rayMinLimitInt;
|
||
|
|
_rayMaxLimitInt = rayMaxLimitInt;
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
|
||
|
|
#ifdef NODE_SORT
|
||
|
|
static PxU32 computeDirMask(const PxVec3& dir)
|
||
|
|
{
|
||
|
|
const PxU32* binary = reinterpret_cast<const PxU32*>(&dir.x);
|
||
|
|
const PxU32 X = (binary[0])>>31;
|
||
|
|
const PxU32 Y = (binary[1])>>31;
|
||
|
|
const PxU32 Z = (binary[2])>>31;
|
||
|
|
return Z|(Y<<1)|(X<<2);
|
||
|
|
}
|
||
|
|
#endif
|
||
|
|
|
||
|
|
template <int inflateT>
|
||
|
|
static bool stab(const BucketPrunerCore& core, PrunerRaycastCallback& pcbArgName, const PxVec3& rayOrig, const PxVec3& rayDir, float& maxDist, const PxVec3 inflate)
|
||
|
|
{
|
||
|
|
const PxU32 nb = core.mSortedNb;
|
||
|
|
if(!nb
|
||
|
|
#ifdef FREE_PRUNER_SIZE
|
||
|
|
&& !core.mNbFree
|
||
|
|
#endif
|
||
|
|
)
|
||
|
|
return true;
|
||
|
|
|
||
|
|
if(maxDist==PX_MAX_F32)
|
||
|
|
{
|
||
|
|
/*const*/ PxVec3 boxMin = core.mGlobalBox.getMin() - inflate;
|
||
|
|
/*const*/ PxVec3 boxMax = core.mGlobalBox.getMax() + inflate;
|
||
|
|
|
||
|
|
#ifdef FREE_PRUNER_SIZE
|
||
|
|
if(core.mNbFree)
|
||
|
|
{
|
||
|
|
// TODO: optimize this
|
||
|
|
PxBounds3 freeGlobalBounds;
|
||
|
|
freeGlobalBounds.setEmpty();
|
||
|
|
for(PxU32 i=0;i<core.mNbFree;i++)
|
||
|
|
freeGlobalBounds.include(core.mFreeBounds[i]);
|
||
|
|
freeGlobalBounds.minimum -= inflate;
|
||
|
|
freeGlobalBounds.maximum += inflate;
|
||
|
|
boxMin = boxMin.minimum(freeGlobalBounds.minimum);
|
||
|
|
boxMax = boxMax.maximum(freeGlobalBounds.maximum);
|
||
|
|
}
|
||
|
|
#endif
|
||
|
|
clipRay(rayOrig, rayDir, maxDist, boxMin, boxMax);
|
||
|
|
}
|
||
|
|
|
||
|
|
#ifdef CAN_USE_MOVEMASK
|
||
|
|
RayParams rayParams;
|
||
|
|
#ifdef USE_SIMD
|
||
|
|
rayParams.padding0 = rayParams.padding1 = rayParams.padding2 = rayParams.padding3 = 0.0f;
|
||
|
|
#endif
|
||
|
|
if(inflateT)
|
||
|
|
rayParams.mInflate = inflate;
|
||
|
|
|
||
|
|
precomputeRayData(&rayParams, rayOrig, rayDir, maxDist);
|
||
|
|
#else
|
||
|
|
BPRayAABBTest test(rayOrig, rayDir, maxDist, inflateT ? inflate : PxVec3(0.0f));
|
||
|
|
#endif
|
||
|
|
|
||
|
|
#ifdef FREE_PRUNER_SIZE
|
||
|
|
BucketPrunerRaycastAdapter pcb(pcbArgName, core.mFreeObjects, core.mFreeTransforms);
|
||
|
|
|
||
|
|
for(PxU32 i=0;i<core.mNbFree;i++)
|
||
|
|
{
|
||
|
|
BucketBox tmp;
|
||
|
|
tmp.mCenter = core.mFreeBounds[i].getCenter();
|
||
|
|
tmp.mExtents = core.mFreeBounds[i].getExtents();
|
||
|
|
|
||
|
|
#ifdef CAN_USE_MOVEMASK
|
||
|
|
if(segmentAABB<inflateT>(tmp, &rayParams))
|
||
|
|
#else
|
||
|
|
if(segmentAABB<inflateT>(tmp, test))
|
||
|
|
#endif
|
||
|
|
{
|
||
|
|
if(!pcb.invoke(maxDist, i))
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
#endif
|
||
|
|
|
||
|
|
if(!nb)
|
||
|
|
return true;
|
||
|
|
|
||
|
|
#ifdef CAN_USE_MOVEMASK
|
||
|
|
if(!segmentAABB<inflateT>(core.mGlobalBox, &rayParams))
|
||
|
|
return true;
|
||
|
|
#else
|
||
|
|
if(!segmentAABB<inflateT>(core.mGlobalBox, test))
|
||
|
|
return true;
|
||
|
|
#endif
|
||
|
|
|
||
|
|
const PxU32 sortAxis = core.mSortAxis;
|
||
|
|
float rayMinLimit, rayMaxLimit;
|
||
|
|
if(inflateT)
|
||
|
|
computeRayLimits(rayMinLimit, rayMaxLimit, rayOrig, rayDir, maxDist, inflate, sortAxis);
|
||
|
|
else
|
||
|
|
computeRayLimits(rayMinLimit, rayMaxLimit, rayOrig, rayDir, maxDist, sortAxis);
|
||
|
|
|
||
|
|
const PxU32* binaryMinLimit = reinterpret_cast<const PxU32*>(&rayMinLimit);
|
||
|
|
const PxU32* binaryMaxLimit = reinterpret_cast<const PxU32*>(&rayMaxLimit);
|
||
|
|
PxU32 rayMinLimitInt = encodeFloat(binaryMinLimit[0]);
|
||
|
|
PxU32 rayMaxLimitInt = encodeFloat(binaryMaxLimit[0]);
|
||
|
|
/*
|
||
|
|
float rayMinLimitX, rayMaxLimitX;
|
||
|
|
if(inflateT)
|
||
|
|
computeRayLimits(rayMinLimitX, rayMaxLimitX, rayOrig, rayDir, maxDist, inflate, 0);
|
||
|
|
else
|
||
|
|
computeRayLimits(rayMinLimitX, rayMaxLimitX, rayOrig, rayDir, maxDist, 0);
|
||
|
|
|
||
|
|
PxU32 rayMinLimitIntX = encodeFloat(PX_IR(rayMinLimitX));
|
||
|
|
PxU32 rayMaxLimitIntX = encodeFloat(PX_IR(rayMaxLimitX));
|
||
|
|
*/
|
||
|
|
|
||
|
|
float currentDist = maxDist;
|
||
|
|
|
||
|
|
#ifdef NODE_SORT
|
||
|
|
const PxU32 dirIndex = computeDirMask(rayDir);
|
||
|
|
PxU32 orderi = core.mLevel1.mOrder[dirIndex];
|
||
|
|
// PxU32 orderi = sort(core.mLevel1, rayDir);
|
||
|
|
|
||
|
|
for(PxU32 i_=0;i_<5;i_++)
|
||
|
|
{
|
||
|
|
const PxU32 i = orderi&7; orderi>>=3;
|
||
|
|
#else
|
||
|
|
for(PxU32 i=0;i<5;i++)
|
||
|
|
{
|
||
|
|
#endif
|
||
|
|
|
||
|
|
#ifdef CAN_USE_MOVEMASK
|
||
|
|
if(core.mLevel1.mCounters[i] && segmentAABB<inflateT>(core.mLevel1.mBucketBox[i], &rayParams))
|
||
|
|
#else
|
||
|
|
if(core.mLevel1.mCounters[i] && segmentAABB<inflateT>(core.mLevel1.mBucketBox[i], test))
|
||
|
|
// if(core.mLevel1.mCounters[i] && segmentAABB<inflateT>(core.mLevel1.mBucketBox[i], test, rayMinLimitIntX, rayMaxLimitIntX))
|
||
|
|
#endif
|
||
|
|
{
|
||
|
|
|
||
|
|
#ifdef NODE_SORT
|
||
|
|
PxU32 orderj = core.mLevel2[i].mOrder[dirIndex];
|
||
|
|
// PxU32 orderj = sort(core.mLevel2[i], rayDir);
|
||
|
|
|
||
|
|
for(PxU32 j_=0;j_<5;j_++)
|
||
|
|
{
|
||
|
|
const PxU32 j = orderj&7; orderj>>=3;
|
||
|
|
#else
|
||
|
|
for(PxU32 j=0;j<5;j++)
|
||
|
|
{
|
||
|
|
#endif
|
||
|
|
|
||
|
|
#ifdef CAN_USE_MOVEMASK
|
||
|
|
if(core.mLevel2[i].mCounters[j] && segmentAABB<inflateT>(core.mLevel2[i].mBucketBox[j], &rayParams))
|
||
|
|
#else
|
||
|
|
if(core.mLevel2[i].mCounters[j] && segmentAABB<inflateT>(core.mLevel2[i].mBucketBox[j], test))
|
||
|
|
// if(core.mLevel2[i].mCounters[j] && segmentAABB<inflateT>(core.mLevel2[i].mBucketBox[j], test, rayMinLimitIntX, rayMaxLimitIntX))
|
||
|
|
#endif
|
||
|
|
{
|
||
|
|
const BucketPrunerNode& parent = core.mLevel3[i][j];
|
||
|
|
const PxU32 parentOffset = core.mLevel1.mOffsets[i] + core.mLevel2[i].mOffsets[j];
|
||
|
|
|
||
|
|
#ifdef NODE_SORT
|
||
|
|
PxU32 orderk = parent.mOrder[dirIndex];
|
||
|
|
// PxU32 orderk = sort(parent, rayDir);
|
||
|
|
|
||
|
|
for(PxU32 k_=0;k_<5;k_++)
|
||
|
|
{
|
||
|
|
const PxU32 k = orderk&7; orderk>>=3;
|
||
|
|
#else
|
||
|
|
for(PxU32 k=0;k<5;k++)
|
||
|
|
{
|
||
|
|
#endif
|
||
|
|
const PxU32 nbInBucket = parent.mCounters[k];
|
||
|
|
#ifdef CAN_USE_MOVEMASK
|
||
|
|
if(nbInBucket && segmentAABB<inflateT>(parent.mBucketBox[k], &rayParams))
|
||
|
|
#else
|
||
|
|
if(nbInBucket && segmentAABB<inflateT>(parent.mBucketBox[k], test))
|
||
|
|
// if(nbInBucket && segmentAABB<inflateT>(parent.mBucketBox[k], test, rayMinLimitIntX, rayMaxLimitIntX))
|
||
|
|
#endif
|
||
|
|
{
|
||
|
|
const PxU32 offset = parentOffset + parent.mOffsets[k];
|
||
|
|
const bool again = processBucket<inflateT>( nbInBucket, core.mSortedWorldBoxes, core.mSortedObjects,
|
||
|
|
core.mSortedTransforms,
|
||
|
|
offset, core.mSortedNb,
|
||
|
|
rayOrig, rayDir, currentDist,
|
||
|
|
#ifdef CAN_USE_MOVEMASK
|
||
|
|
&rayParams,
|
||
|
|
#else
|
||
|
|
test, inflate,
|
||
|
|
#endif
|
||
|
|
pcbArgName,
|
||
|
|
rayMinLimitInt, rayMaxLimitInt,
|
||
|
|
sortAxis);
|
||
|
|
if(!again)
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
maxDist = currentDist;
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
|
||
|
|
bool BucketPrunerCore::raycast(const PxVec3& origin, const PxVec3& unitDir, PxReal& inOutDistance, PrunerRaycastCallback& pcb) const
|
||
|
|
{
|
||
|
|
return ::stab<0>(*this, pcb, origin, unitDir, inOutDistance, PxVec3(0.0f));
|
||
|
|
}
|
||
|
|
|
||
|
|
bool BucketPrunerCore::sweep(const ShapeData& queryVolume, const PxVec3& unitDir, PxReal& inOutDistance, PrunerRaycastCallback& pcb) const
|
||
|
|
{
|
||
|
|
const PxVec3 extents = queryVolume.getPrunerInflatedWorldAABB().getExtents();
|
||
|
|
return ::stab<1>(*this, pcb, queryVolume.getPrunerInflatedWorldAABB().getCenter(), unitDir, inOutDistance, extents);
|
||
|
|
}
|
||
|
|
|
||
|
|
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||
|
|
|
||
|
|
// PT: TODO: decoupling the pruner callback revealed quite a bug here: we call this processBucket function with an inflateT param,
|
||
|
|
// which is re-interpreted as "doAssert" for overlaps! What happened here?
|
||
|
|
template<bool doAssert, typename Test>
|
||
|
|
static PX_FORCE_INLINE bool processBucket( PxU32 nb, const BucketBox* PX_RESTRICT baseBoxes, const PrunerPayload* PX_RESTRICT baseObjects,
|
||
|
|
const PxTransform* PX_RESTRICT baseTransforms,
|
||
|
|
PxU32 offset, PxU32 totalAllocated,
|
||
|
|
const Test& test, PrunerOverlapCallback& pcbArgName,
|
||
|
|
PxU32 minLimitInt, PxU32 maxLimitInt)
|
||
|
|
{
|
||
|
|
PX_UNUSED(totalAllocated);
|
||
|
|
|
||
|
|
const BucketBox* PX_RESTRICT boxes = baseBoxes + offset;
|
||
|
|
|
||
|
|
BucketPrunerOverlapAdapter pcb(pcbArgName, baseObjects + offset, baseTransforms + offset);
|
||
|
|
|
||
|
|
for(PxU32 i=0;i<nb;i++)
|
||
|
|
{
|
||
|
|
const BucketBox& currentBox = *boxes++;
|
||
|
|
|
||
|
|
if(currentBox.mData1<minLimitInt)
|
||
|
|
{
|
||
|
|
if(doAssert)
|
||
|
|
PX_ASSERT(!test(currentBox));
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
|
||
|
|
if(currentBox.mData0>maxLimitInt)
|
||
|
|
{
|
||
|
|
if(doAssert)
|
||
|
|
PX_ASSERT(!test(currentBox));
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
|
||
|
|
if(test(currentBox))
|
||
|
|
{
|
||
|
|
if(!pcb.invoke(i))
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
|
||
|
|
template<typename Test, bool isPrecise>
|
||
|
|
class BucketPrunerOverlapTraversal
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
PX_FORCE_INLINE BucketPrunerOverlapTraversal() {}
|
||
|
|
|
||
|
|
/*PX_FORCE_INLINE*/ bool operator()(const BucketPrunerCore& core, const Test& test, PrunerOverlapCallback& pcbArgName, const PxBounds3& cullBox) const
|
||
|
|
{
|
||
|
|
#ifdef FREE_PRUNER_SIZE
|
||
|
|
BucketPrunerOverlapAdapter pcb(pcbArgName, core.mFreeObjects, core.mFreeTransforms);
|
||
|
|
|
||
|
|
for(PxU32 i=0;i<core.mNbFree;i++)
|
||
|
|
{
|
||
|
|
if(test(core.mFreeBounds[i]))
|
||
|
|
{
|
||
|
|
if(!pcb.invoke(i))
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
#endif
|
||
|
|
const PxU32 nb = core.mSortedNb;
|
||
|
|
if(!nb)
|
||
|
|
return true;
|
||
|
|
|
||
|
|
#ifdef BRUTE_FORCE_LIMIT
|
||
|
|
if(nb<=BRUTE_FORCE_LIMIT)
|
||
|
|
{
|
||
|
|
for(PxU32 i=0;i<nb;i++)
|
||
|
|
{
|
||
|
|
if(test(core.mSortedWorldBoxes[i]))
|
||
|
|
{
|
||
|
|
PxReal dist = -1.0f; // no distance for overlaps
|
||
|
|
if(!pcb.invoke(dist, core.mSortedObjects[i]))
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
#endif
|
||
|
|
|
||
|
|
if(!test(core.mGlobalBox))
|
||
|
|
return true;
|
||
|
|
|
||
|
|
const PxU32 sortAxis = core.mSortAxis;
|
||
|
|
const float boxMinLimit = cullBox.minimum[sortAxis];
|
||
|
|
const float boxMaxLimit = cullBox.maximum[sortAxis];
|
||
|
|
|
||
|
|
const PxU32* binaryMinLimit = reinterpret_cast<const PxU32*>(&boxMinLimit);
|
||
|
|
const PxU32* binaryMaxLimit = reinterpret_cast<const PxU32*>(&boxMaxLimit);
|
||
|
|
const PxU32 rayMinLimitInt = encodeFloat(binaryMinLimit[0]);
|
||
|
|
const PxU32 rayMaxLimitInt = encodeFloat(binaryMaxLimit[0]);
|
||
|
|
|
||
|
|
for(PxU32 i=0;i<5;i++)
|
||
|
|
{
|
||
|
|
if(core.mLevel1.mCounters[i] && test(core.mLevel1.mBucketBox[i]))
|
||
|
|
{
|
||
|
|
for(PxU32 j=0;j<5;j++)
|
||
|
|
{
|
||
|
|
if(core.mLevel2[i].mCounters[j] && test(core.mLevel2[i].mBucketBox[j]))
|
||
|
|
{
|
||
|
|
for(PxU32 k=0;k<5;k++)
|
||
|
|
{
|
||
|
|
const PxU32 nbInBucket = core.mLevel3[i][j].mCounters[k];
|
||
|
|
if(nbInBucket && test(core.mLevel3[i][j].mBucketBox[k]))
|
||
|
|
{
|
||
|
|
const PxU32 offset = core.mLevel1.mOffsets[i] + core.mLevel2[i].mOffsets[j] + core.mLevel3[i][j].mOffsets[k];
|
||
|
|
if(!processBucket<isPrecise>(nbInBucket, core.mSortedWorldBoxes, core.mSortedObjects,
|
||
|
|
core.mSortedTransforms,
|
||
|
|
offset, core.mSortedNb, test, pcbArgName, rayMinLimitInt, rayMaxLimitInt))
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
};
|
||
|
|
|
||
|
|
///////////////////////////////////////////////////////////////////////////////
|
||
|
|
|
||
|
|
#ifdef CAN_USE_MOVEMASK
|
||
|
|
PX_FORCE_INLINE PxU32 BAllTrue3_R(const BoolV a)
|
||
|
|
{
|
||
|
|
const PxI32 moveMask = _mm_movemask_ps(a);
|
||
|
|
return PxU32((moveMask & 0x7) == (0x7));
|
||
|
|
}
|
||
|
|
#endif
|
||
|
|
|
||
|
|
#ifdef USE_SIMD
|
||
|
|
struct SphereAABBTest_SIMD
|
||
|
|
{
|
||
|
|
PX_FORCE_INLINE SphereAABBTest_SIMD(const Sphere& sphere) :
|
||
|
|
#ifdef CAN_USE_MOVEMASK
|
||
|
|
mCenter (V4LoadU(&sphere.center.x)),
|
||
|
|
#else
|
||
|
|
mCenter (V3LoadU(sphere.center)),
|
||
|
|
#endif
|
||
|
|
mRadius2(FLoad(sphere.radius * sphere.radius))
|
||
|
|
{}
|
||
|
|
|
||
|
|
PX_FORCE_INLINE PxIntBool operator()(const BucketBox& box) const
|
||
|
|
{
|
||
|
|
#ifdef CAN_USE_MOVEMASK
|
||
|
|
const Vec4V boxCenter = AlignedLoad(&box.mCenter.x);
|
||
|
|
const Vec4V boxExtents = AlignedLoad(&box.mExtents.x);
|
||
|
|
//
|
||
|
|
const Vec4V offset = V4Sub(mCenter, boxCenter);
|
||
|
|
const Vec4V closest = V4Clamp(offset, V4Neg(boxExtents), boxExtents);
|
||
|
|
const Vec4V d = V4Sub(offset, closest);
|
||
|
|
|
||
|
|
const FloatV dot = V4Dot3(d,d);
|
||
|
|
return PxIntBool(BAllTrue3_R(FIsGrtrOrEq(mRadius2, dot)));
|
||
|
|
#else
|
||
|
|
const Vec3V boxCenter = V3LoadU(box.mCenter);
|
||
|
|
const Vec3V boxExtents = V3LoadU(box.mExtents);
|
||
|
|
//
|
||
|
|
const Vec3V offset = V3Sub(mCenter, boxCenter);
|
||
|
|
const Vec3V closest = V3Clamp(offset, V3Neg(boxExtents), boxExtents);
|
||
|
|
const Vec3V d = V3Sub(offset, closest);
|
||
|
|
return PxIntBool(BAllEqTTTT(FIsGrtrOrEq(mRadius2, V3Dot(d, d))));
|
||
|
|
#endif
|
||
|
|
}
|
||
|
|
|
||
|
|
PX_FORCE_INLINE PxIntBool operator()(const PxBounds3& bounds) const
|
||
|
|
{
|
||
|
|
BucketBox tmp;
|
||
|
|
tmp.mCenter = bounds.getCenter();
|
||
|
|
tmp.mExtents = bounds.getExtents();
|
||
|
|
return (*this)(tmp);
|
||
|
|
}
|
||
|
|
|
||
|
|
private:
|
||
|
|
SphereAABBTest_SIMD& operator=(const SphereAABBTest_SIMD&);
|
||
|
|
#ifdef CAN_USE_MOVEMASK
|
||
|
|
const Vec4V mCenter;
|
||
|
|
#else
|
||
|
|
const Vec3V mCenter;
|
||
|
|
#endif
|
||
|
|
const FloatV mRadius2;
|
||
|
|
};
|
||
|
|
#else
|
||
|
|
struct SphereAABBTest_Scalar
|
||
|
|
{
|
||
|
|
PX_FORCE_INLINE SphereAABBTest_Scalar(const Sphere& sphere) :
|
||
|
|
mCenter (sphere.center),
|
||
|
|
mRadius2(sphere.radius * sphere.radius)
|
||
|
|
{}
|
||
|
|
|
||
|
|
PX_FORCE_INLINE PxIntBool operator()(const BucketBox& box) const
|
||
|
|
{
|
||
|
|
const PxVec3 minimum = box.getMin();
|
||
|
|
const PxVec3 maximum = box.getMax();
|
||
|
|
|
||
|
|
float d = 0.0f;
|
||
|
|
|
||
|
|
//find the square of the distance
|
||
|
|
//from the sphere to the box
|
||
|
|
for(PxU32 i=0;i<3;i++)
|
||
|
|
{
|
||
|
|
if(mCenter[i]<minimum[i])
|
||
|
|
{
|
||
|
|
const float s = mCenter[i] - minimum[i];
|
||
|
|
d += s*s;
|
||
|
|
}
|
||
|
|
else if(mCenter[i]>maximum[i])
|
||
|
|
{
|
||
|
|
const float s = mCenter[i] - maximum[i];
|
||
|
|
d += s*s;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
return d <= mRadius2;
|
||
|
|
}
|
||
|
|
|
||
|
|
private:
|
||
|
|
SphereAABBTest_Scalar& operator=(const SphereAABBTest_Scalar&);
|
||
|
|
const PxVec3 mCenter;
|
||
|
|
float mRadius2;
|
||
|
|
};
|
||
|
|
#endif
|
||
|
|
|
||
|
|
#ifdef USE_SIMD
|
||
|
|
typedef SphereAABBTest_SIMD BucketPrunerSphereAABBTest;
|
||
|
|
#else
|
||
|
|
typedef SphereAABBTest_Scalar BucketPrunerSphereAABBTest;
|
||
|
|
#endif
|
||
|
|
|
||
|
|
///////////////////////////////////////////////////////////////////////////////
|
||
|
|
|
||
|
|
struct BucketPrunerAABBAABBTest
|
||
|
|
{
|
||
|
|
PX_FORCE_INLINE BucketPrunerAABBAABBTest(const PxBounds3& queryBox) : mBox(queryBox) {}
|
||
|
|
|
||
|
|
PX_FORCE_INLINE PxIntBool operator()(const BucketBox& box) const
|
||
|
|
{
|
||
|
|
// PT: we don't use PxBounds3::intersects() because isValid() asserts on our empty boxes!
|
||
|
|
const PxVec3 bucketMin = box.getMin();
|
||
|
|
const PxVec3 bucketMax = box.getMax();
|
||
|
|
return !(mBox.minimum.x > bucketMax.x || bucketMin.x > mBox.maximum.x ||
|
||
|
|
mBox.minimum.y > bucketMax.y || bucketMin.y > mBox.maximum.y ||
|
||
|
|
mBox.minimum.z > bucketMax.z || bucketMin.z > mBox.maximum.z);
|
||
|
|
}
|
||
|
|
|
||
|
|
PX_FORCE_INLINE PxIntBool operator()(const PxBounds3& bounds) const
|
||
|
|
{
|
||
|
|
// PT: we don't use PxBounds3::intersects() because isValid() asserts on our empty boxes!
|
||
|
|
const PxVec3& bucketMin = bounds.minimum;
|
||
|
|
const PxVec3& bucketMax = bounds.maximum;
|
||
|
|
return !(mBox.minimum.x > bucketMax.x || bucketMin.x > mBox.maximum.x ||
|
||
|
|
mBox.minimum.y > bucketMax.y || bucketMin.y > mBox.maximum.y ||
|
||
|
|
mBox.minimum.z > bucketMax.z || bucketMin.z > mBox.maximum.z);
|
||
|
|
}
|
||
|
|
private:
|
||
|
|
BucketPrunerAABBAABBTest& operator=(const BucketPrunerAABBAABBTest&);
|
||
|
|
const PxBounds3 mBox;
|
||
|
|
};
|
||
|
|
|
||
|
|
/*struct BucketPrunerAABBAABBTest_SIMD
|
||
|
|
{
|
||
|
|
PX_FORCE_INLINE BucketPrunerAABBAABBTest_SIMD(const PxBounds3& b)
|
||
|
|
: mCenter(V3LoadU(b.getCenter()))
|
||
|
|
, mExtents(V3LoadU(b.getExtents()))
|
||
|
|
{}
|
||
|
|
|
||
|
|
PX_FORCE_INLINE PxIntBool operator()(const BucketBox& box) const
|
||
|
|
{
|
||
|
|
return V3AllGrtrOrEq(V3Add(mExtents, AlignedLoad(&box.mExtents.x)), V3Abs(V3Sub(AlignedLoad(&box.mCenter.x), mCenter)));
|
||
|
|
}
|
||
|
|
private:
|
||
|
|
BucketPrunerAABBAABBTest_SIMD& operator=(const BucketPrunerAABBAABBTest_SIMD&);
|
||
|
|
const Vec3V mCenter, mExtents;
|
||
|
|
};*/
|
||
|
|
|
||
|
|
///////////////////////////////////////////////////////////////////////////////
|
||
|
|
|
||
|
|
#ifdef USE_SIMD
|
||
|
|
struct OBBAABBTest_SIMD
|
||
|
|
{
|
||
|
|
OBBAABBTest_SIMD(const PxMat33& rotation, const PxVec3& translation, const PxVec3& extents)
|
||
|
|
{
|
||
|
|
const Vec3V eps = V3Load(1e-6f);
|
||
|
|
|
||
|
|
mT = V3LoadU(translation);
|
||
|
|
mExtents = V3LoadU(extents);
|
||
|
|
|
||
|
|
// storing the transpose matrices yields a simpler SIMD test
|
||
|
|
mRT = Mat33V_From_PxMat33(rotation.getTranspose());
|
||
|
|
mART = Mat33V(V3Add(V3Abs(mRT.col0), eps), V3Add(V3Abs(mRT.col1), eps), V3Add(V3Abs(mRT.col2), eps));
|
||
|
|
mBB_xyz = M33TrnspsMulV3(mART, mExtents);
|
||
|
|
|
||
|
|
/* if(fullTest)
|
||
|
|
{
|
||
|
|
const Vec3V eYZX = V3PermYZX(mExtents), eZXY = V3PermZXY(mExtents);
|
||
|
|
|
||
|
|
mBB_123 = V3MulAdd(eYZX, V3PermZXY(mART.col0), V3Mul(eZXY, V3PermYZX(mART.col0)));
|
||
|
|
mBB_456 = V3MulAdd(eYZX, V3PermZXY(mART.col1), V3Mul(eZXY, V3PermYZX(mART.col1)));
|
||
|
|
mBB_789 = V3MulAdd(eYZX, V3PermZXY(mART.col2), V3Mul(eZXY, V3PermYZX(mART.col2)));
|
||
|
|
}*/
|
||
|
|
}
|
||
|
|
|
||
|
|
PX_FORCE_INLINE PxIntBool operator()(const BucketBox& box) const
|
||
|
|
{
|
||
|
|
const Vec3V extentsV = V3LoadU(box.mExtents);
|
||
|
|
|
||
|
|
const Vec3V t = V3Sub(mT, V3LoadU(box.mCenter));
|
||
|
|
|
||
|
|
// class I - axes of AABB
|
||
|
|
if(V3OutOfBounds(t, V3Add(extentsV, mBB_xyz)))
|
||
|
|
return PxIntFalse;
|
||
|
|
|
||
|
|
const Vec3V rX = mRT.col0, rY = mRT.col1, rZ = mRT.col2;
|
||
|
|
const Vec3V arX = mART.col0, arY = mART.col1, arZ = mART.col2;
|
||
|
|
|
||
|
|
const FloatV eX = V3GetX(extentsV), eY = V3GetY(extentsV), eZ = V3GetZ(extentsV);
|
||
|
|
const FloatV tX = V3GetX(t), tY = V3GetY(t), tZ = V3GetZ(t);
|
||
|
|
|
||
|
|
// class II - axes of OBB
|
||
|
|
{
|
||
|
|
const Vec3V v = V3ScaleAdd(rZ, tZ, V3ScaleAdd(rY, tY, V3Scale(rX, tX)));
|
||
|
|
const Vec3V v2 = V3ScaleAdd(arZ, eZ, V3ScaleAdd(arY, eY, V3ScaleAdd(arX, eX, mExtents)));
|
||
|
|
if(V3OutOfBounds(v, v2))
|
||
|
|
return PxIntFalse;
|
||
|
|
}
|
||
|
|
|
||
|
|
// if(!fullTest)
|
||
|
|
return PxIntTrue;
|
||
|
|
|
||
|
|
/* // class III - edge cross products. Almost all OBB tests early-out with type I or type II,
|
||
|
|
// so early-outs here probably aren't useful (TODO: profile)
|
||
|
|
|
||
|
|
const Vec3V va = V3NegScaleSub(rZ, tY, V3Scale(rY, tZ));
|
||
|
|
const Vec3V va2 = V3ScaleAdd(arY, eZ, V3ScaleAdd(arZ, eY, mBB_123));
|
||
|
|
const BoolV ba = BOr(V3IsGrtr(va, va2), V3IsGrtr(V3Neg(va2), va));
|
||
|
|
|
||
|
|
const Vec3V vb = V3NegScaleSub(rX, tZ, V3Scale(rZ, tX));
|
||
|
|
const Vec3V vb2 = V3ScaleAdd(arX, eZ, V3ScaleAdd(arZ, eX, mBB_456));
|
||
|
|
const BoolV bb = BOr(V3IsGrtr(vb, vb2), V3IsGrtr(V3Neg(vb2), vb));
|
||
|
|
|
||
|
|
const Vec3V vc = V3NegScaleSub(rY, tX, V3Scale(rX, tY));
|
||
|
|
const Vec3V vc2 = V3ScaleAdd(arX, eY, V3ScaleAdd(arY, eX, mBB_789));
|
||
|
|
const BoolV bc = BOr(V3IsGrtr(vc, vc2), V3IsGrtr(V3Neg(vc2), vc));
|
||
|
|
|
||
|
|
return BAllEq(BOr(ba, BOr(bb,bc)), BFFFF());*/
|
||
|
|
}
|
||
|
|
|
||
|
|
PX_FORCE_INLINE PxIntBool operator()(const PxBounds3& bounds) const
|
||
|
|
{
|
||
|
|
BucketBox tmp;
|
||
|
|
tmp.mCenter = bounds.getCenter();
|
||
|
|
tmp.mExtents = bounds.getExtents();
|
||
|
|
return (*this)(tmp);
|
||
|
|
}
|
||
|
|
|
||
|
|
Vec3V mExtents; // extents of OBB
|
||
|
|
Vec3V mT; // translation of OBB
|
||
|
|
Mat33V mRT; // transpose of rotation matrix of OBB
|
||
|
|
Mat33V mART; // transpose of mRT, padded by epsilon
|
||
|
|
Vec3V mBB_xyz; // extents of OBB along coordinate axes
|
||
|
|
|
||
|
|
/* Vec3V mBB_123; // projections of extents onto edge-cross axes
|
||
|
|
Vec3V mBB_456;
|
||
|
|
Vec3V mBB_789;*/
|
||
|
|
};
|
||
|
|
#else
|
||
|
|
struct OBBAABBTest_Scalar
|
||
|
|
{
|
||
|
|
OBBAABBTest_Scalar(const PxMat33& rotation, const PxVec3& translation, const PxVec3& extents)
|
||
|
|
{
|
||
|
|
mR = rotation;
|
||
|
|
mT = translation;
|
||
|
|
mExtents = extents;
|
||
|
|
|
||
|
|
const PxVec3 eps(1e-6f);
|
||
|
|
mAR = PxMat33(mR[0].abs() + eps, mR[1].abs() + eps, mR[2].abs() + eps); // Epsilon prevents floating-point inaccuracies (strategy borrowed from RAPID)
|
||
|
|
mBB_xyz = mAR.transform(mExtents); // Precompute box-box data - Courtesy of Erwin de Vries
|
||
|
|
|
||
|
|
/* PxReal ex = mExtents.x, ey = mExtents.y, ez = mExtents.z;
|
||
|
|
mBB_1 = ey*mAR[2].x + ez*mAR[1].x; mBB_2 = ez*mAR[0].x + ex*mAR[2].x; mBB_3 = ex*mAR[1].x + ey*mAR[0].x;
|
||
|
|
mBB_4 = ey*mAR[2].y + ez*mAR[1].y; mBB_5 = ez*mAR[0].y + ex*mAR[2].y; mBB_6 = ex*mAR[1].y + ey*mAR[0].y;
|
||
|
|
mBB_7 = ey*mAR[2].z + ez*mAR[1].z; mBB_8 = ez*mAR[0].z + ex*mAR[2].z; mBB_9 = ex*mAR[1].z + ey*mAR[0].z;*/
|
||
|
|
}
|
||
|
|
|
||
|
|
PX_FORCE_INLINE PxIntBool operator()(const BucketBox& box) const
|
||
|
|
{
|
||
|
|
const PxVec3& c = box.mCenter;
|
||
|
|
const PxVec3& e = box.mExtents;
|
||
|
|
|
||
|
|
const PxVec3 T = mT - c;
|
||
|
|
// Class I : A's basis vectors
|
||
|
|
if(PxAbs(T.x) > e.x + mBB_xyz.x) return PxIntFalse;
|
||
|
|
if(PxAbs(T.y) > e.y + mBB_xyz.y) return PxIntFalse;
|
||
|
|
if(PxAbs(T.z) > e.z + mBB_xyz.z) return PxIntFalse;
|
||
|
|
|
||
|
|
// Class II : B's basis vectors
|
||
|
|
if(PxAbs(T.dot(mR[0])) > e.dot(mAR[0]) + mExtents.x) return PxIntFalse;
|
||
|
|
if(PxAbs(T.dot(mR[1])) > e.dot(mAR[1]) + mExtents.y) return PxIntFalse;
|
||
|
|
if(PxAbs(T.dot(mR[2])) > e.dot(mAR[2]) + mExtents.z) return PxIntFalse;
|
||
|
|
|
||
|
|
// Class III : 9 cross products
|
||
|
|
if(0)
|
||
|
|
{
|
||
|
|
if(PxAbs(T.z*mR[0].y - T.y*mR[0].z) > e.y*mAR[0].z + e.z*mAR[0].y + mBB_1) return PxIntFalse; // L = A0 x B0
|
||
|
|
if(PxAbs(T.z*mR[1].y - T.y*mR[1].z) > e.y*mAR[1].z + e.z*mAR[1].y + mBB_2) return PxIntFalse; // L = A0 x B1
|
||
|
|
if(PxAbs(T.z*mR[2].y - T.y*mR[2].z) > e.y*mAR[2].z + e.z*mAR[2].y + mBB_3) return PxIntFalse; // L = A0 x B2
|
||
|
|
|
||
|
|
if(PxAbs(T.x*mR[0].z - T.z*mR[0].x) > e.x*mAR[0].z + e.z*mAR[0].x + mBB_4) return PxIntFalse; // L = A1 x B0
|
||
|
|
if(PxAbs(T.x*mR[1].z - T.z*mR[1].x) > e.x*mAR[1].z + e.z*mAR[1].x + mBB_5) return PxIntFalse; // L = A1 x B1
|
||
|
|
if(PxAbs(T.x*mR[2].z - T.z*mR[2].x) > e.x*mAR[2].z + e.z*mAR[2].x + mBB_6) return PxIntFalse; // L = A1 x B2
|
||
|
|
|
||
|
|
if(PxAbs(T.y*mR[0].x - T.x*mR[0].y) > e.x*mAR[0].y + e.y*mAR[0].x + mBB_7) return PxIntFalse; // L = A2 x B0
|
||
|
|
if(PxAbs(T.y*mR[1].x - T.x*mR[1].y) > e.x*mAR[1].y + e.y*mAR[1].x + mBB_8) return PxIntFalse; // L = A2 x B1
|
||
|
|
if(PxAbs(T.y*mR[2].x - T.x*mR[2].y) > e.x*mAR[2].y + e.y*mAR[2].x + mBB_9) return PxIntFalse; // L = A2 x B2
|
||
|
|
}
|
||
|
|
return PxIntTrue;
|
||
|
|
}
|
||
|
|
|
||
|
|
private:
|
||
|
|
PxMat33 mR; // rotation matrix
|
||
|
|
PxMat33 mAR; // absolute rotation matrix
|
||
|
|
PxVec3 mT; // translation from obb space to model space
|
||
|
|
PxVec3 mExtents;
|
||
|
|
|
||
|
|
PxVec3 mBB_xyz;
|
||
|
|
|
||
|
|
float mBB_1, mBB_2, mBB_3;
|
||
|
|
float mBB_4, mBB_5, mBB_6;
|
||
|
|
float mBB_7, mBB_8, mBB_9;
|
||
|
|
};
|
||
|
|
#endif
|
||
|
|
|
||
|
|
#ifdef USE_SIMD
|
||
|
|
typedef OBBAABBTest_SIMD BucketPrunerOBBAABBTest;
|
||
|
|
#else
|
||
|
|
typedef OBBAABBTest_Scalar BucketPrunerOBBAABBTest;
|
||
|
|
#endif
|
||
|
|
|
||
|
|
///////////////////////////////////////////////////////////////////////////////
|
||
|
|
|
||
|
|
bool BucketPrunerCore::overlap(const ShapeData& queryVolume, PrunerOverlapCallback& pcb) const
|
||
|
|
{
|
||
|
|
PX_ASSERT(!mDirty);
|
||
|
|
bool again = true;
|
||
|
|
|
||
|
|
const PxBounds3& cullBox = queryVolume.getPrunerInflatedWorldAABB();
|
||
|
|
|
||
|
|
switch(queryVolume.getType())
|
||
|
|
{
|
||
|
|
case PxGeometryType::eBOX:
|
||
|
|
{
|
||
|
|
if(queryVolume.isOBB())
|
||
|
|
{
|
||
|
|
const BucketPrunerOverlapTraversal<BucketPrunerOBBAABBTest, false> overlap;
|
||
|
|
again = overlap(*this,
|
||
|
|
BucketPrunerOBBAABBTest(
|
||
|
|
queryVolume.getPrunerWorldRot33(), queryVolume.getPrunerWorldPos(),
|
||
|
|
queryVolume.getPrunerBoxGeomExtentsInflated()),
|
||
|
|
pcb, cullBox);
|
||
|
|
}
|
||
|
|
else
|
||
|
|
{
|
||
|
|
const BucketPrunerOverlapTraversal<BucketPrunerAABBAABBTest, true> overlap;
|
||
|
|
again = overlap(*this, BucketPrunerAABBAABBTest(cullBox), pcb, cullBox);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
break;
|
||
|
|
|
||
|
|
case PxGeometryType::eCAPSULE:
|
||
|
|
{
|
||
|
|
const BucketPrunerOverlapTraversal<BucketPrunerOBBAABBTest, false> overlap;
|
||
|
|
again = overlap(*this,
|
||
|
|
BucketPrunerOBBAABBTest(
|
||
|
|
queryVolume.getPrunerWorldRot33(), queryVolume.getPrunerWorldPos(),
|
||
|
|
queryVolume.getPrunerBoxGeomExtentsInflated()),
|
||
|
|
pcb, cullBox);
|
||
|
|
}
|
||
|
|
break;
|
||
|
|
|
||
|
|
case PxGeometryType::eSPHERE:
|
||
|
|
{
|
||
|
|
const Sphere& sphere = queryVolume.getGuSphere();
|
||
|
|
const PxVec3 sphereExtents(sphere.radius);
|
||
|
|
const BucketPrunerOverlapTraversal<BucketPrunerSphereAABBTest, true> overlap;
|
||
|
|
again = overlap(*this, BucketPrunerSphereAABBTest(sphere), pcb, cullBox);
|
||
|
|
}
|
||
|
|
break;
|
||
|
|
|
||
|
|
case PxGeometryType::eCONVEXMESH:
|
||
|
|
{
|
||
|
|
const BucketPrunerOverlapTraversal<BucketPrunerOBBAABBTest, false> overlap;
|
||
|
|
again = overlap(*this,
|
||
|
|
BucketPrunerOBBAABBTest(
|
||
|
|
queryVolume.getPrunerWorldRot33(), queryVolume.getPrunerWorldPos(),
|
||
|
|
queryVolume.getPrunerBoxGeomExtentsInflated()),
|
||
|
|
pcb, cullBox);
|
||
|
|
}
|
||
|
|
break;
|
||
|
|
|
||
|
|
default:
|
||
|
|
PX_ALWAYS_ASSERT_MESSAGE("unsupported overlap query volume geometry type");
|
||
|
|
}
|
||
|
|
return again;
|
||
|
|
}
|
||
|
|
|
||
|
|
///////////////////////////////////////////////////////////////////////////////
|
||
|
|
|
||
|
|
void BucketPrunerCore::getGlobalBounds(PxBounds3& bounds) const
|
||
|
|
{
|
||
|
|
// PT: TODO: refactor with similar code above in the file
|
||
|
|
const Vec4V centerV = V4LoadU(&mGlobalBox.mCenter.x);
|
||
|
|
const Vec4V extentsV = V4LoadU(&mGlobalBox.mExtents.x);
|
||
|
|
|
||
|
|
Vec4V minV = V4Sub(centerV, extentsV);
|
||
|
|
Vec4V maxV = V4Add(centerV, extentsV);
|
||
|
|
|
||
|
|
#ifdef FREE_PRUNER_SIZE
|
||
|
|
PxU32 nbFree = mNbFree;
|
||
|
|
if(nbFree)
|
||
|
|
{
|
||
|
|
const PxBounds3* freeBounds = mFreeBounds;
|
||
|
|
while(nbFree--)
|
||
|
|
{
|
||
|
|
minV = V4Min(minV, V4LoadU(&freeBounds->minimum.x));
|
||
|
|
maxV = V4Max(maxV, V4LoadU(&freeBounds->maximum.x));
|
||
|
|
freeBounds++;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
#endif
|
||
|
|
|
||
|
|
StoreBounds(bounds, minV, maxV);
|
||
|
|
}
|
||
|
|
|
||
|
|
///////////////////////////////////////////////////////////////////////////////
|
||
|
|
|
||
|
|
void BucketPrunerCore::shiftOrigin(const PxVec3& shift)
|
||
|
|
{
|
||
|
|
#ifdef FREE_PRUNER_SIZE
|
||
|
|
for(PxU32 i=0;i<mNbFree;i++)
|
||
|
|
{
|
||
|
|
mFreeBounds[i].minimum -= shift;
|
||
|
|
mFreeBounds[i].maximum -= shift;
|
||
|
|
mFreeTransforms[i].p -= shift;
|
||
|
|
}
|
||
|
|
#endif
|
||
|
|
const PxU32 nb = mCoreNbObjects;
|
||
|
|
//if (nb)
|
||
|
|
{
|
||
|
|
mGlobalBox.mCenter -= shift;
|
||
|
|
|
||
|
|
#if PX_DEBUG
|
||
|
|
mGlobalBox.mDebugMin -= shift[mSortAxis];
|
||
|
|
#endif
|
||
|
|
|
||
|
|
encodeBoxMinMax(mGlobalBox, mSortAxis);
|
||
|
|
|
||
|
|
for(PxU32 i=0; i<nb; i++)
|
||
|
|
{
|
||
|
|
mCoreBoxes[i].minimum -= shift;
|
||
|
|
mCoreBoxes[i].maximum -= shift;
|
||
|
|
mCoreTransforms[i].p -= shift;
|
||
|
|
}
|
||
|
|
|
||
|
|
for(PxU32 i=0; i<mSortedNb; i++)
|
||
|
|
{
|
||
|
|
mSortedWorldBoxes[i].mCenter -= shift;
|
||
|
|
|
||
|
|
#if PX_DEBUG
|
||
|
|
mSortedWorldBoxes[i].mDebugMin -= shift[mSortAxis];
|
||
|
|
#endif
|
||
|
|
encodeBoxMinMax(mSortedWorldBoxes[i], mSortAxis);
|
||
|
|
|
||
|
|
mSortedTransforms[i].p -= shift;
|
||
|
|
}
|
||
|
|
|
||
|
|
for(PxU32 i=0; i < 5; i++)
|
||
|
|
mLevel1.mBucketBox[i].mCenter -= shift;
|
||
|
|
|
||
|
|
for(PxU32 i=0; i < 5; i++)
|
||
|
|
for(PxU32 j=0; j < 5; j++)
|
||
|
|
mLevel2[i].mBucketBox[j].mCenter -= shift;
|
||
|
|
|
||
|
|
for(PxU32 i=0; i < 5; i++)
|
||
|
|
for(PxU32 j=0; j < 5; j++)
|
||
|
|
for(PxU32 k=0; k < 5; k++)
|
||
|
|
mLevel3[i][j].mBucketBox[k].mCenter -= shift;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
///////////////////////////////////////////////////////////////////////////////
|
||
|
|
|
||
|
|
static void visualize(PxRenderOutput& out, const BucketBox& bounds)
|
||
|
|
{
|
||
|
|
Cm::renderOutputDebugBox(out, PxBounds3(bounds.getMin(), bounds.getMax()));
|
||
|
|
}
|
||
|
|
|
||
|
|
void BucketPrunerCore::visualize(PxRenderOutput& out, PxU32 color) const
|
||
|
|
{
|
||
|
|
const PxTransform idt = PxTransform(PxIdentity);
|
||
|
|
out << idt;
|
||
|
|
out << color;
|
||
|
|
|
||
|
|
::visualize(out, mGlobalBox);
|
||
|
|
|
||
|
|
for(PxU32 i=0;i<5;i++)
|
||
|
|
{
|
||
|
|
if(!mLevel1.mCounters[i])
|
||
|
|
continue;
|
||
|
|
|
||
|
|
::visualize(out, mLevel1.mBucketBox[i]);
|
||
|
|
|
||
|
|
for(PxU32 j=0;j<5;j++)
|
||
|
|
{
|
||
|
|
if(!mLevel2[i].mCounters[j])
|
||
|
|
continue;
|
||
|
|
|
||
|
|
::visualize(out, mLevel2[i].mBucketBox[j]);
|
||
|
|
|
||
|
|
for(PxU32 k=0;k<5;k++)
|
||
|
|
{
|
||
|
|
if(!mLevel3[i][j].mCounters[k])
|
||
|
|
continue;
|
||
|
|
|
||
|
|
::visualize(out, mLevel3[i][j].mBucketBox[k]);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||
|
|
|
||
|
|
BucketPruner::BucketPruner(PxU64 contextID) : mPool(contextID, TRANSFORM_CACHE_GLOBAL)
|
||
|
|
{
|
||
|
|
}
|
||
|
|
|
||
|
|
BucketPruner::~BucketPruner()
|
||
|
|
{
|
||
|
|
}
|
||
|
|
|
||
|
|
static PX_FORCE_INLINE void setExternalMemory(BucketPrunerCore& core, PruningPool& pool)
|
||
|
|
{
|
||
|
|
core.mDirty = true;
|
||
|
|
|
||
|
|
core.setExternalMemory(pool.getNbActiveObjects(), pool.getCurrentWorldBoxes(), pool.getObjects(), pool.getTransforms());
|
||
|
|
}
|
||
|
|
|
||
|
|
bool BucketPruner::addObjects(PrunerHandle* results, const PxBounds3* bounds, const PrunerPayload* data, const PxTransform* transforms, PxU32 count, bool)
|
||
|
|
{
|
||
|
|
if(!count)
|
||
|
|
return true;
|
||
|
|
|
||
|
|
const PxU32 valid = mPool.addObjects(results, bounds, data, transforms, count);
|
||
|
|
|
||
|
|
::setExternalMemory(mCore, mPool);
|
||
|
|
|
||
|
|
return valid == count;
|
||
|
|
}
|
||
|
|
|
||
|
|
void BucketPruner::removeObjects(const PrunerHandle* handles, PxU32 count, PrunerPayloadRemovalCallback* removalCallback)
|
||
|
|
{
|
||
|
|
if(!count)
|
||
|
|
return;
|
||
|
|
|
||
|
|
for(PxU32 i=0;i<count;i++)
|
||
|
|
mPool.removeObject(handles[i], removalCallback);
|
||
|
|
|
||
|
|
::setExternalMemory(mCore, mPool);
|
||
|
|
}
|
||
|
|
|
||
|
|
void BucketPruner::updateObjects(const PrunerHandle* handles, PxU32 count, float inflation, const PxU32* boundsIndices, const PxBounds3* newBounds, const PxTransform32* newTransforms)
|
||
|
|
{
|
||
|
|
if(!count)
|
||
|
|
return;
|
||
|
|
|
||
|
|
if(handles && boundsIndices && newBounds)
|
||
|
|
mPool.updateAndInflateBounds(handles, boundsIndices, newBounds, newTransforms, count, inflation);
|
||
|
|
|
||
|
|
::setExternalMemory(mCore, mPool);
|
||
|
|
}
|
||
|
|
|
||
|
|
void BucketPruner::purge()
|
||
|
|
{
|
||
|
|
}
|
||
|
|
|
||
|
|
void BucketPruner::commit()
|
||
|
|
{
|
||
|
|
mCore.build();
|
||
|
|
}
|
||
|
|
|
||
|
|
void BucketPruner::merge(const void*)
|
||
|
|
{
|
||
|
|
// merge not implemented for bucket pruner
|
||
|
|
}
|
||
|
|
|
||
|
|
void BucketPruner::shiftOrigin(const PxVec3& shift)
|
||
|
|
{
|
||
|
|
mCore.shiftOrigin(shift);
|
||
|
|
}
|
||
|
|
|
||
|
|
bool BucketPruner::sweep(const ShapeData& queryVolume, const PxVec3& unitDir, PxReal& inOutDistance, PrunerRaycastCallback& pcb) const
|
||
|
|
{
|
||
|
|
PX_ASSERT(!mCore.mDirty);
|
||
|
|
if(mCore.mDirty)
|
||
|
|
return true; // it may crash otherwise
|
||
|
|
return mCore.sweep(queryVolume, unitDir, inOutDistance, pcb);
|
||
|
|
}
|
||
|
|
|
||
|
|
bool BucketPruner::overlap(const ShapeData& queryVolume, PrunerOverlapCallback& pcb) const
|
||
|
|
{
|
||
|
|
PX_ASSERT(!mCore.mDirty);
|
||
|
|
if(mCore.mDirty)
|
||
|
|
return true; // it may crash otherwise
|
||
|
|
return mCore.overlap(queryVolume, pcb);
|
||
|
|
}
|
||
|
|
|
||
|
|
bool BucketPruner::raycast(const PxVec3& origin, const PxVec3& unitDir, PxReal& inOutDistance, PrunerRaycastCallback& pcb) const
|
||
|
|
{
|
||
|
|
PX_ASSERT(!mCore.mDirty);
|
||
|
|
if(mCore.mDirty)
|
||
|
|
return true; // it may crash otherwise
|
||
|
|
return mCore.raycast(origin, unitDir, inOutDistance, pcb);
|
||
|
|
}
|
||
|
|
|
||
|
|
void BucketPruner::visualize(PxRenderOutput& out, PxU32 primaryColor, PxU32 /*secondaryColor*/) const
|
||
|
|
{
|
||
|
|
mCore.visualize(out, primaryColor);
|
||
|
|
}
|
||
|
|
|
||
|
|
void BucketPruner::getGlobalBounds(PxBounds3& bounds) const
|
||
|
|
{
|
||
|
|
mCore.getGlobalBounds(bounds);
|
||
|
|
}
|
||
|
|
|
||
|
|
#define MBP_ALLOC(x) PX_ALLOC(x, "BucketPruner")
|
||
|
|
#define MBP_ALLOC_TMP(x) PX_ALLOC(x, "BucketPruner")
|
||
|
|
#define MBP_FREE(x) PX_FREE(x)
|
||
|
|
#define INVALID_ID 0xffffffff
|
||
|
|
|
||
|
|
#ifndef USE_REGULAR_HASH_MAP
|
||
|
|
static PX_FORCE_INLINE bool differentPair(const BucketPrunerPair& p, const PrunerPayload& data)
|
||
|
|
{
|
||
|
|
const bool same = p.mData == data;
|
||
|
|
return !same;
|
||
|
|
}
|
||
|
|
|
||
|
|
///////////////////////////////////////////////////////////////////////////////
|
||
|
|
|
||
|
|
BucketPrunerMap::BucketPrunerMap() :
|
||
|
|
mHashSize (0),
|
||
|
|
mMask (0),
|
||
|
|
mNbActivePairs (0),
|
||
|
|
mHashTable (NULL),
|
||
|
|
mNext (NULL),
|
||
|
|
mActivePairs (NULL),
|
||
|
|
mReservedMemory (0)
|
||
|
|
{
|
||
|
|
}
|
||
|
|
|
||
|
|
///////////////////////////////////////////////////////////////////////////////
|
||
|
|
|
||
|
|
BucketPrunerMap::~BucketPrunerMap()
|
||
|
|
{
|
||
|
|
purge();
|
||
|
|
}
|
||
|
|
|
||
|
|
///////////////////////////////////////////////////////////////////////////////
|
||
|
|
|
||
|
|
void BucketPrunerMap::purge()
|
||
|
|
{
|
||
|
|
MBP_FREE(mNext);
|
||
|
|
MBP_FREE(mActivePairs);
|
||
|
|
MBP_FREE(mHashTable);
|
||
|
|
mHashSize = 0;
|
||
|
|
mMask = 0;
|
||
|
|
mNbActivePairs = 0;
|
||
|
|
}
|
||
|
|
|
||
|
|
///////////////////////////////////////////////////////////////////////////////
|
||
|
|
|
||
|
|
const BucketPrunerPair* BucketPrunerMap::findPair(const PrunerPayload& payload) const
|
||
|
|
{
|
||
|
|
if(!mHashTable)
|
||
|
|
return NULL; // Nothing has been allocated yet
|
||
|
|
|
||
|
|
// Compute hash value for this pair
|
||
|
|
const PxU32 hashValue = PxComputeHash(payload) & mMask;
|
||
|
|
|
||
|
|
const BucketPrunerPair* PX_RESTRICT activePairs = mActivePairs;
|
||
|
|
const PxU32* PX_RESTRICT next = mNext;
|
||
|
|
|
||
|
|
// Look for it in the table
|
||
|
|
PxU32 offset = mHashTable[hashValue];
|
||
|
|
while(offset!=INVALID_ID && differentPair(activePairs[offset], payload))
|
||
|
|
{
|
||
|
|
offset = next[offset]; // Better to have a separate array for this
|
||
|
|
}
|
||
|
|
if(offset==INVALID_ID)
|
||
|
|
return NULL;
|
||
|
|
PX_ASSERT(offset<mNbActivePairs);
|
||
|
|
// Match mActivePairs[offset] => the pair is persistent
|
||
|
|
return &activePairs[offset];
|
||
|
|
}
|
||
|
|
|
||
|
|
// Internal version saving hash computation
|
||
|
|
PX_FORCE_INLINE BucketPrunerPair* BucketPrunerMap::findPair(const PrunerPayload& payload, PxU32 hashValue) const
|
||
|
|
{
|
||
|
|
if(!mHashTable)
|
||
|
|
return NULL; // Nothing has been allocated yet
|
||
|
|
|
||
|
|
BucketPrunerPair* PX_RESTRICT activePairs = mActivePairs;
|
||
|
|
const PxU32* PX_RESTRICT next = mNext;
|
||
|
|
|
||
|
|
// Look for it in the table
|
||
|
|
PxU32 offset = mHashTable[hashValue];
|
||
|
|
while(offset!=INVALID_ID && differentPair(activePairs[offset], payload))
|
||
|
|
{
|
||
|
|
offset = next[offset]; // Better to have a separate array for this
|
||
|
|
}
|
||
|
|
if(offset==INVALID_ID)
|
||
|
|
return NULL;
|
||
|
|
PX_ASSERT(offset<mNbActivePairs);
|
||
|
|
// Match mActivePairs[offset] => the pair is persistent
|
||
|
|
return &activePairs[offset];
|
||
|
|
}
|
||
|
|
|
||
|
|
///////////////////////////////////////////////////////////////////////////////
|
||
|
|
|
||
|
|
BucketPrunerPair* BucketPrunerMap::addPair(const PrunerPayload& payload, PxU32 coreIndex, PxU32 timeStamp)
|
||
|
|
{
|
||
|
|
PxU32 hashValue = PxComputeHash(payload) & mMask;
|
||
|
|
|
||
|
|
{
|
||
|
|
BucketPrunerPair* PX_RESTRICT p = findPair(payload, hashValue);
|
||
|
|
if(p)
|
||
|
|
{
|
||
|
|
PX_ASSERT(p->mCoreIndex==coreIndex);
|
||
|
|
PX_ASSERT(p->mTimeStamp==timeStamp);
|
||
|
|
return p; // Persistent pair
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// This is a new pair
|
||
|
|
if(mNbActivePairs >= mHashSize)
|
||
|
|
{
|
||
|
|
// Get more entries
|
||
|
|
mHashSize = PxNextPowerOfTwo(mNbActivePairs+1);
|
||
|
|
mMask = mHashSize-1;
|
||
|
|
|
||
|
|
reallocPairs();
|
||
|
|
|
||
|
|
// Recompute hash value with new hash size
|
||
|
|
hashValue = PxComputeHash(payload) & mMask; // ### redundant hash computation here?
|
||
|
|
}
|
||
|
|
|
||
|
|
BucketPrunerPair* PX_RESTRICT p = &mActivePairs[mNbActivePairs];
|
||
|
|
p->mData = payload;
|
||
|
|
p->mCoreIndex = coreIndex;
|
||
|
|
p->mTimeStamp = timeStamp;
|
||
|
|
mNext[mNbActivePairs] = mHashTable[hashValue];
|
||
|
|
mHashTable[hashValue] = mNbActivePairs++;
|
||
|
|
return p;
|
||
|
|
}
|
||
|
|
|
||
|
|
///////////////////////////////////////////////////////////////////////////////
|
||
|
|
|
||
|
|
void BucketPrunerMap::removePairInternal(const PrunerPayload& /*payload*/, PxU32 hashValue, PxU32 pairIndex)
|
||
|
|
{
|
||
|
|
// Walk the hash table to fix mNext
|
||
|
|
{
|
||
|
|
PxU32 offset = mHashTable[hashValue];
|
||
|
|
PX_ASSERT(offset!=INVALID_ID);
|
||
|
|
|
||
|
|
PxU32 previous=INVALID_ID;
|
||
|
|
while(offset!=pairIndex)
|
||
|
|
{
|
||
|
|
previous = offset;
|
||
|
|
offset = mNext[offset];
|
||
|
|
}
|
||
|
|
|
||
|
|
// Let us go/jump us
|
||
|
|
if(previous!=INVALID_ID)
|
||
|
|
{
|
||
|
|
PX_ASSERT(mNext[previous]==pairIndex);
|
||
|
|
mNext[previous] = mNext[pairIndex];
|
||
|
|
}
|
||
|
|
// else we were the first
|
||
|
|
else mHashTable[hashValue] = mNext[pairIndex];
|
||
|
|
// we're now free to reuse mNext[pairIndex] without breaking the list
|
||
|
|
}
|
||
|
|
#if PX_DEBUG
|
||
|
|
mNext[pairIndex]=INVALID_ID;
|
||
|
|
#endif
|
||
|
|
// Invalidate entry
|
||
|
|
|
||
|
|
// Fill holes
|
||
|
|
if(1)
|
||
|
|
{
|
||
|
|
// 1) Remove last pair
|
||
|
|
const PxU32 lastPairIndex = mNbActivePairs-1;
|
||
|
|
if(lastPairIndex==pairIndex)
|
||
|
|
{
|
||
|
|
mNbActivePairs--;
|
||
|
|
}
|
||
|
|
else
|
||
|
|
{
|
||
|
|
const BucketPrunerPair* last = &mActivePairs[lastPairIndex];
|
||
|
|
const PxU32 lastHashValue = PxComputeHash(last->mData) & mMask;
|
||
|
|
|
||
|
|
// Walk the hash table to fix mNext
|
||
|
|
PxU32 offset = mHashTable[lastHashValue];
|
||
|
|
PX_ASSERT(offset!=INVALID_ID);
|
||
|
|
|
||
|
|
PxU32 previous=INVALID_ID;
|
||
|
|
while(offset!=lastPairIndex)
|
||
|
|
{
|
||
|
|
previous = offset;
|
||
|
|
offset = mNext[offset];
|
||
|
|
}
|
||
|
|
|
||
|
|
// Let us go/jump us
|
||
|
|
if(previous!=INVALID_ID)
|
||
|
|
{
|
||
|
|
PX_ASSERT(mNext[previous]==lastPairIndex);
|
||
|
|
mNext[previous] = mNext[lastPairIndex];
|
||
|
|
}
|
||
|
|
// else we were the first
|
||
|
|
else mHashTable[lastHashValue] = mNext[lastPairIndex];
|
||
|
|
// we're now free to reuse mNext[lastPairIndex] without breaking the list
|
||
|
|
|
||
|
|
#if PX_DEBUG
|
||
|
|
mNext[lastPairIndex]=INVALID_ID;
|
||
|
|
#endif
|
||
|
|
|
||
|
|
// Don't invalidate entry since we're going to shrink the array
|
||
|
|
|
||
|
|
// 2) Re-insert in free slot
|
||
|
|
mActivePairs[pairIndex] = mActivePairs[lastPairIndex];
|
||
|
|
#if PX_DEBUG
|
||
|
|
PX_ASSERT(mNext[pairIndex]==INVALID_ID);
|
||
|
|
#endif
|
||
|
|
mNext[pairIndex] = mHashTable[lastHashValue];
|
||
|
|
mHashTable[lastHashValue] = pairIndex;
|
||
|
|
|
||
|
|
mNbActivePairs--;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
///////////////////////////////////////////////////////////////////////////////
|
||
|
|
|
||
|
|
bool BucketPrunerMap::removePair(const PrunerPayload& payload, PxU32& coreIndex, PxU32& timeStamp)
|
||
|
|
{
|
||
|
|
const PxU32 hashValue = PxComputeHash(payload) & mMask;
|
||
|
|
const BucketPrunerPair* p = findPair(payload, hashValue);
|
||
|
|
if(!p)
|
||
|
|
return false;
|
||
|
|
PX_ASSERT(p->mData==payload);
|
||
|
|
|
||
|
|
coreIndex = p->mCoreIndex;
|
||
|
|
timeStamp = p->mTimeStamp;
|
||
|
|
|
||
|
|
removePairInternal(payload, hashValue, getPairIndex(p));
|
||
|
|
|
||
|
|
shrinkMemory();
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
|
||
|
|
///////////////////////////////////////////////////////////////////////////////
|
||
|
|
|
||
|
|
void BucketPrunerMap::shrinkMemory()
|
||
|
|
{
|
||
|
|
// Check correct memory against actually used memory
|
||
|
|
const PxU32 correctHashSize = PxNextPowerOfTwo(mNbActivePairs);
|
||
|
|
if(mHashSize==correctHashSize)
|
||
|
|
return;
|
||
|
|
|
||
|
|
if(mReservedMemory && correctHashSize < mReservedMemory)
|
||
|
|
return;
|
||
|
|
|
||
|
|
// Reduce memory used
|
||
|
|
mHashSize = correctHashSize;
|
||
|
|
mMask = mHashSize-1;
|
||
|
|
|
||
|
|
reallocPairs();
|
||
|
|
}
|
||
|
|
|
||
|
|
///////////////////////////////////////////////////////////////////////////////
|
||
|
|
|
||
|
|
static PX_FORCE_INLINE void storeDwords(PxU32* dest, PxU32 nb, PxU32 value)
|
||
|
|
{
|
||
|
|
while(nb--)
|
||
|
|
*dest++ = value;
|
||
|
|
}
|
||
|
|
|
||
|
|
void BucketPrunerMap::reallocPairs()
|
||
|
|
{
|
||
|
|
MBP_FREE(mHashTable);
|
||
|
|
mHashTable = reinterpret_cast<PxU32*>(MBP_ALLOC(mHashSize*sizeof(PxU32)));
|
||
|
|
storeDwords(mHashTable, mHashSize, INVALID_ID);
|
||
|
|
|
||
|
|
// Get some bytes for new entries
|
||
|
|
BucketPrunerPair* newPairs = reinterpret_cast<BucketPrunerPair*>(MBP_ALLOC(mHashSize * sizeof(BucketPrunerPair)));
|
||
|
|
PX_ASSERT(newPairs);
|
||
|
|
|
||
|
|
PxU32* newNext = reinterpret_cast<PxU32*>(MBP_ALLOC(mHashSize * sizeof(PxU32)));
|
||
|
|
PX_ASSERT(newNext);
|
||
|
|
|
||
|
|
// Copy old data if needed
|
||
|
|
if(mNbActivePairs)
|
||
|
|
PxMemCopy(newPairs, mActivePairs, mNbActivePairs*sizeof(BucketPrunerPair));
|
||
|
|
// ### check it's actually needed... probably only for pairs whose hash value was cut by the and
|
||
|
|
// yeah, since hash(id0, id1) is a constant
|
||
|
|
// However it might not be needed to recompute them => only less efficient but still ok
|
||
|
|
for(PxU32 i=0;i<mNbActivePairs;i++)
|
||
|
|
{
|
||
|
|
const PxU32 hashValue = PxComputeHash(mActivePairs[i].mData) & mMask; // New hash value with new mask
|
||
|
|
newNext[i] = mHashTable[hashValue];
|
||
|
|
mHashTable[hashValue] = i;
|
||
|
|
}
|
||
|
|
|
||
|
|
// Delete old data
|
||
|
|
MBP_FREE(mNext);
|
||
|
|
MBP_FREE(mActivePairs);
|
||
|
|
|
||
|
|
// Assign new pointer
|
||
|
|
mActivePairs = newPairs;
|
||
|
|
mNext = newNext;
|
||
|
|
}
|
||
|
|
|
||
|
|
///////////////////////////////////////////////////////////////////////////////
|
||
|
|
|
||
|
|
void BucketPrunerMap::reserveMemory(PxU32 memSize)
|
||
|
|
{
|
||
|
|
if(!memSize)
|
||
|
|
return;
|
||
|
|
|
||
|
|
if(!PxIsPowerOfTwo(memSize))
|
||
|
|
memSize = PxNextPowerOfTwo(memSize);
|
||
|
|
|
||
|
|
mHashSize = memSize;
|
||
|
|
mMask = mHashSize-1;
|
||
|
|
|
||
|
|
mReservedMemory = memSize;
|
||
|
|
|
||
|
|
reallocPairs();
|
||
|
|
}
|
||
|
|
|
||
|
|
///////////////////////////////////////////////////////////////////////////////
|
||
|
|
#endif
|