Files
XCEngine/engine/third_party/physx/source/lowlevelaabb/src/BpBroadPhaseABP.cpp

4341 lines
121 KiB
C++
Raw Normal View History

// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#include "foundation/PxProfiler.h"
#include "foundation/PxMemory.h"
#include "foundation/PxBitUtils.h"
#include "foundation/PxFPU.h"
#include "BpBroadPhaseABP.h"
#include "BpBroadPhaseShared.h"
#include "foundation/PxVecMath.h"
#include "PxcScratchAllocator.h"
#include "common/PxProfileZone.h"
#include "CmRadixSort.h"
#include "CmUtils.h"
#include "GuBounds.h"
#include "foundation/PxThread.h"
#include "foundation/PxSync.h"
#include "task/PxTask.h"
using namespace physx;
using namespace aos;
using namespace Bp;
using namespace Cm;
/*
PT: to try:
- prepare data: sort & compute bounds in parallel? or just MT the last loop?
- switch post update & add delayed pairs?
- MT computeCreatedDeletedPairs
- why do we set the update flag for added/removed objects?
- use timestamps instead of bits?
*/
#define ABP_MT
#define CHECKPOINT(x)
//#include <stdio.h>
//#define CHECKPOINT(x) printf(x);
//#pragma warning (disable : 4702)
#define CODEALIGN16 //_asm align 16
#if PX_INTEL_FAMILY && !defined(PX_SIMD_DISABLED)
#define ABP_SIMD_OVERLAP
#endif
//#define ABP_BATCHING 128
#define ABP_BATCHING 256
//#define USE_ABP_BUCKETS 5000 // PT: don't use buckets below that number...
#define USE_ABP_BUCKETS 512 // PT: don't use buckets below that number...
//#define USE_ABP_BUCKETS 64 // PT: don't use buckets below that number...
#ifdef USE_ABP_BUCKETS
#define NB_BUCKETS 5
// Regular version: 5 buckets a la bucket pruner (4 + cross bucket)
// Alternative version: 4 buckets + dup objects a la MBP regions
// #define USE_ALTERNATIVE_VERSION
#define ABP_USE_INTEGER_XS2 // Works but questionable speedups
#else
#define ABP_USE_INTEGER_XS
#endif
#define NB_SENTINELS 6
//#define RECURSE_LIMIT 20000
typedef PxU32 ABP_Index;
static const bool gPrepareOverlapsFlag = true;
#ifdef ABP_SIMD_OVERLAP
static const bool gUseRegularBPKernel = false; // false to use "version 13" in box pruning series
static const bool gUnrollLoop = true; // true to use "version 14" in box pruning series
#else
// PT: tested on Switch, for some reason the regular version is fastest there
static const bool gUseRegularBPKernel = true; // false to use "version 13" in box pruning series
static const bool gUnrollLoop = false; // true to use "version 14" in box pruning series
//ABP_SIMD_OVERLAP
//MBP.Add64KObjects 13982 ( +0.0%) 4757795 ( +0.0%) FAIL
//MBP.AddBroadPhaseRegion 0 ( +0.0%) 3213795 ( +0.0%) FAIL
//MBP.FinalizeOverlaps64KObjects 507 ( +0.0%) 5650723 ( +0.0%) FAIL
//MBP.FindOverlaps64KMixedObjects 59258 ( +0.0%) 5170179 ( +0.0%) FAIL
//MBP.FindOverlaps64KObjects 31351 ( +0.0%) 7122019 ( +0.0%) FAIL
//MBP.Remove64KObjects 4993 ( +0.0%) 5281683 ( +0.0%) FAIL
//MBP.Update64KObjects 13711 ( +0.0%) 5521699 ( +0.0%) FAIL
//gUseRegularBPKernel:
//MBP.Add64KObjects 14406 ( +0.0%) 4757795 ( +0.0%) FAIL
//MBP.AddBroadPhaseRegion 0 ( +0.0%) 3213795 ( +0.0%) FAIL
//MBP.FinalizeOverlaps64KObjects 504 ( +0.0%) 5650723 ( +0.0%) FAIL
//MBP.FindOverlaps64KMixedObjects 48929 ( +0.0%) 5170179 ( +0.0%) FAIL
//MBP.FindOverlaps64KObjects 25636 ( +0.0%) 7122019 ( +0.0%) FAIL
//MBP.Remove64KObjects 4878 ( +0.0%) 5281683 ( +0.0%) FAIL
//MBP.Update64KObjects 13932 ( +0.0%) 5521699 ( +0.0%) FAIL
// false/true
//MBP.Add64KObjects 14278 ( +0.0%) 4757795 ( +0.0%) FAIL
//MBP.AddBroadPhaseRegion 0 ( +0.0%) 3213795 ( +0.0%) FAIL
//MBP.FinalizeOverlaps64KObjects 504 ( +0.0%) 5650723 ( +0.0%) FAIL
//MBP.FindOverlaps64KMixedObjects 60331 ( +0.0%) 5170179 ( +0.0%) FAIL
//MBP.FindOverlaps64KObjects 32064 ( +0.0%) 7122019 ( +0.0%) FAIL
//MBP.Remove64KObjects 4930 ( +0.0%) 5281683 ( +0.0%) FAIL
//MBP.Update64KObjects 13673 ( +0.0%) 5521699 ( +0.0%) FAIL
// false/false
//MBP.Add64KObjects 13960 ( +0.0%) 4757795 ( +0.0%) FAIL
//MBP.AddBroadPhaseRegion 0 ( +0.0%) 3213795 ( +0.0%) FAIL
//MBP.FinalizeOverlaps64KObjects 503 ( +0.0%) 5650723 ( +0.0%) FAIL
//MBP.FindOverlaps64KMixedObjects 48549 ( +0.0%) 5170179 ( +0.0%) FAIL
//MBP.FindOverlaps64KObjects 25598 ( +0.0%) 7122019 ( +0.0%) FAIL
//MBP.Remove64KObjects 4883 ( +0.0%) 5281683 ( +0.0%) FAIL
//MBP.Update64KObjects 13667 ( +0.0%) 5521699 ( +0.0%) FAIL
#endif
#ifdef ABP_USE_INTEGER_XS
typedef PxU32 PosXType;
#define SentinelValue 0xffffffff
#else
typedef float PosXType;
#define SentinelValue FLT_MAX
#endif
#ifdef ABP_USE_INTEGER_XS2
typedef PxU32 PosXType2;
#define SentinelValue2 0xffffffff
#else
#ifdef ABP_USE_INTEGER_XS
typedef PxU32 PosXType2;
#define SentinelValue2 0xffffffff
#else
typedef float PosXType2;
#define SentinelValue2 FLT_MAX
#endif
#endif
namespace internalABP
{
struct SIMD_AABB4 : public PxUserAllocated
{
PX_FORCE_INLINE void initFrom2(const PxBounds3& box)
{
#ifdef ABP_USE_INTEGER_XS
mMinX = encodeFloat(PX_IR(box.minimum.x));
mMaxX = encodeFloat(PX_IR(box.maximum.x));
mMinY = box.minimum.y;
mMinZ = box.minimum.z;
mMaxY = box.maximum.y;
mMaxZ = box.maximum.z;
#else
mMinX = box.minimum.x;
mMinY = box.minimum.y;
mMinZ = box.minimum.z;
mMaxX = box.maximum.x;
mMaxY = box.maximum.y;
mMaxZ = box.maximum.z;
#endif
}
PX_FORCE_INLINE void operator = (const SIMD_AABB4& box)
{
mMinX = box.mMinX;
mMinY = box.mMinY;
mMinZ = box.mMinZ;
mMaxX = box.mMaxX;
mMaxY = box.mMaxY;
mMaxZ = box.mMaxZ;
}
PX_FORCE_INLINE void initSentinel()
{
mMinX = SentinelValue;
}
PX_FORCE_INLINE bool isSentinel() const
{
return mMinX == SentinelValue;
}
#ifdef USE_ABP_BUCKETS
// PT: to be able to compute bounds easily
PosXType mMinX;
float mMinY;
float mMinZ;
PosXType mMaxX;
float mMaxY;
float mMaxZ;
#else
PosXType mMinX;
PosXType mMaxX;
float mMinY;
float mMinZ;
float mMaxY;
float mMaxZ;
#endif
};
#define USE_SHARED_CLASSES
#ifdef USE_SHARED_CLASSES
struct SIMD_AABB_X4 : public AABB_Xi
{
PX_FORCE_INLINE void initFrom(const SIMD_AABB4& box)
{
#ifdef ABP_USE_INTEGER_XS2
initFromFloats(&box.mMinX, &box.mMaxX);
#else
mMinX = box.mMinX;
mMaxX = box.mMaxX;
#endif
}
};
PX_ALIGN_PREFIX(16)
#ifdef ABP_SIMD_OVERLAP
struct SIMD_AABB_YZ4 : AABB_YZn
{
PX_FORCE_INLINE void initFrom(const SIMD_AABB4& box)
{
#ifdef ABP_SIMD_OVERLAP
mMinY = -box.mMinY;
mMinZ = -box.mMinZ;
#else
mMinY = box.mMinY;
mMinZ = box.mMinZ;
#endif
mMaxY = box.mMaxY;
mMaxZ = box.mMaxZ;
}
}
#else
struct SIMD_AABB_YZ4 : AABB_YZr
{
PX_FORCE_INLINE void initFrom(const SIMD_AABB4& box)
{
mMinY = box.mMinY;
mMinZ = box.mMinZ;
mMaxY = box.mMaxY;
mMaxZ = box.mMaxZ;
}
}
#endif
PX_ALIGN_SUFFIX(16);
#else
struct SIMD_AABB_X4 : public PxUserAllocated
{
PX_FORCE_INLINE void initFromFloats(const void* PX_RESTRICT minX, const void* PX_RESTRICT maxX)
{
mMinX = encodeFloat(*reinterpret_cast<const PxU32*>(minX));
mMaxX = encodeFloat(*reinterpret_cast<const PxU32*>(maxX));
}
PX_FORCE_INLINE void initFrom(const SIMD_AABB4& box)
{
#ifdef ABP_USE_INTEGER_XS2
initFromFloats(&box.mMinX, &box.mMaxX);
#else
mMinX = box.mMinX;
mMaxX = box.mMaxX;
#endif
}
PX_FORCE_INLINE void initFromPxVec4(const PxVec4& min, const PxVec4& max)
{
#ifdef ABP_USE_INTEGER_XS2
initFromFloats(&min.x, &max.x);
#else
#ifdef ABP_USE_INTEGER_XS
initFromFloats(&min.x, &max.x);
#else
mMinX = min.x;
mMaxX = max.x;
#endif
#endif
}
PX_FORCE_INLINE void operator = (const SIMD_AABB_X4& box)
{
mMinX = box.mMinX;
mMaxX = box.mMaxX;
}
PX_FORCE_INLINE void initSentinel()
{
mMinX = SentinelValue2;
}
PX_FORCE_INLINE bool isSentinel() const
{
return mMinX == SentinelValue2;
}
PosXType2 mMinX;
PosXType2 mMaxX;
};
struct SIMD_AABB_YZ4 : public PxUserAllocated
{
PX_FORCE_INLINE void initFrom(const SIMD_AABB4& box)
{
#ifdef ABP_SIMD_OVERLAP
mMinY = -box.mMinY;
mMinZ = -box.mMinZ;
#else
mMinY = box.mMinY;
mMinZ = box.mMinZ;
#endif
mMaxY = box.mMaxY;
mMaxZ = box.mMaxZ;
}
PX_FORCE_INLINE void initFromPxVec4(const PxVec4& min, const PxVec4& max)
{
#ifdef ABP_SIMD_OVERLAP
mMinY = -min.y;
mMinZ = -min.z;
#else
mMinY = min.y;
mMinZ = min.z;
#endif
mMaxY = max.y;
mMaxZ = max.z;
}
PX_FORCE_INLINE void operator = (const SIMD_AABB_YZ4& box)
{
V4StoreA(V4LoadA(&box.mMinY), &mMinY);
}
float mMinY;
float mMinZ;
float mMaxY;
float mMaxZ;
};
#endif
#define MBP_ALLOC(x) PX_ALLOC(x, "MBP")
#define MBP_ALLOC_TMP(x) PX_ALLOC(x, "MBP_TMP")
#define MBP_FREE(x) PX_FREE(x)
#define INVALID_ID 0xffffffff
///////////////////////////////////////////////////////////////////////////////
#define DEFAULT_NB_ENTRIES 128
class ABP_MM
{
public:
ABP_MM() : mScratchAllocator(NULL) {}
~ABP_MM() {}
void* frameAlloc(PxU32 size);
void frameFree(void* address);
PxcScratchAllocator* mScratchAllocator;
};
void* ABP_MM::frameAlloc(PxU32 size)
{
if(mScratchAllocator)
return mScratchAllocator->alloc(size, true);
return PX_ALLOC(size, "frameAlloc");
}
void ABP_MM::frameFree(void* address)
{
if(mScratchAllocator)
mScratchAllocator->free(address);
else
PX_FREE(address);
}
template<class T>
static T* resizeBoxesT(PxU32 oldNbBoxes, PxU32 newNbBoxes, T* boxes)
{
T* newBoxes = reinterpret_cast<T*>(MBP_ALLOC(sizeof(T)*newNbBoxes));
if(oldNbBoxes)
PxMemCopy(newBoxes, boxes, oldNbBoxes*sizeof(T));
MBP_FREE(boxes);
return newBoxes;
}
class Boxes
{
public:
Boxes();
~Boxes();
PX_FORCE_INLINE void init(const Boxes& boxes){ mSize = boxes.mSize; mCapacity = boxes.mCapacity; }
PX_FORCE_INLINE PxU32 getSize() const { return mSize; }
PX_FORCE_INLINE PxU32 getCapacity() const { return mCapacity; }
PX_FORCE_INLINE bool isFull() const { return mSize==mCapacity; }
PX_FORCE_INLINE void reset() { mSize = mCapacity = 0; }
PX_FORCE_INLINE PxU32 popBack() { return --mSize; }
// protected:
PxU32 mSize;
PxU32 mCapacity;
};
Boxes::Boxes() :
mSize (0),
mCapacity (0)
{
}
Boxes::~Boxes()
{
reset();
}
class StraightBoxes : public Boxes
{
public:
StraightBoxes();
~StraightBoxes();
void init(PxU32 size, PxU32 capacity, SIMD_AABB4* boxes);
void reset();
PxU32 resize();
PxU32 resize(PxU32 incoming);
bool allocate(PxU32 nb);
PX_FORCE_INLINE const SIMD_AABB4* getBoxes() const { return mBoxes; }
PX_FORCE_INLINE SIMD_AABB4* getBoxes() { return mBoxes; }
PX_FORCE_INLINE void setBounds(PxU32 index, const SIMD_AABB4& box)
{
PX_ASSERT(index<mSize);
mBoxes[index] = box;
}
PX_FORCE_INLINE PxU32 pushBack(const SIMD_AABB4& box)
{
const PxU32 index = mSize++;
setBounds(index, box);
return index;
}
private:
SIMD_AABB4* mBoxes;
};
StraightBoxes::StraightBoxes() :
mBoxes (NULL)
{
}
StraightBoxes::~StraightBoxes()
{
reset();
}
void StraightBoxes::reset()
{
PX_DELETE_ARRAY(mBoxes);
Boxes::reset();
}
void StraightBoxes::init(PxU32 size, PxU32 capacity, SIMD_AABB4* boxes)
{
reset();
mSize = size;
mCapacity = capacity;
mBoxes = boxes;
}
PxU32 StraightBoxes::resize()
{
const PxU32 capacity = mCapacity;
const PxU32 size = mSize;
// const PxU32 newCapacity = capacity ? capacity + DEFAULT_NB_ENTRIES : DEFAULT_NB_ENTRIES;
// const PxU32 newCapacity = capacity ? capacity*2 : DEFAULT_NB_ENTRIES;
const PxU32 newCapacity = capacity ? capacity*2 : DEFAULT_NB_ENTRIES;
// PT: we allocate one extra box for safe SIMD loads
mBoxes = resizeBoxesT(size, newCapacity+1, mBoxes);
mCapacity = newCapacity;
return newCapacity;
}
PxU32 StraightBoxes::resize(PxU32 incoming)
{
const PxU32 capacity = mCapacity;
const PxU32 size = mSize;
const PxU32 minCapacity = size + incoming;
if(minCapacity<capacity)
return capacity;
PxU32 newCapacity = capacity ? capacity*2 : DEFAULT_NB_ENTRIES;
if(newCapacity<minCapacity)
newCapacity=minCapacity;
// PT: we allocate one extra box for safe SIMD loads
mBoxes = resizeBoxesT(size, newCapacity+1, mBoxes);
mCapacity = newCapacity;
return newCapacity;
}
bool StraightBoxes::allocate(PxU32 nb)
{
if(nb<=mSize)
return false;
PX_DELETE_ARRAY(mBoxes);
// PT: we allocate NB_SENTINELS more boxes than necessary here so we don't need to allocate one more for SIMD-load safety
mBoxes = PX_NEW(SIMD_AABB4)[nb+NB_SENTINELS];
mSize = mCapacity = nb;
return true;
}
class SplitBoxes : public Boxes
{
public:
SplitBoxes();
~SplitBoxes();
void init(PxU32 size, PxU32 capacity, SIMD_AABB_X4* boxes_X, SIMD_AABB_YZ4* boxes_YZ);
void init(const SplitBoxes& boxes);
void reset(bool freeMemory = true);
PxU32 resize();
PxU32 resize(PxU32 incoming);
bool allocate(PxU32 nb);
PX_FORCE_INLINE const SIMD_AABB_X4* getBoxes_X() const { return mBoxes_X; }
PX_FORCE_INLINE SIMD_AABB_X4* getBoxes_X() { return mBoxes_X; }
PX_FORCE_INLINE const SIMD_AABB_YZ4* getBoxes_YZ() const { return mBoxes_YZ; }
PX_FORCE_INLINE SIMD_AABB_YZ4* getBoxes_YZ() { return mBoxes_YZ; }
PX_FORCE_INLINE void setBounds(PxU32 index, const PxVec4& min, const PxVec4& max)
{
PX_ASSERT(index<mSize);
mBoxes_X[index].initFromPxVec4(min, max);
mBoxes_YZ[index].initFromPxVec4(min, max);
}
PX_FORCE_INLINE void setBounds(PxU32 index, const SIMD_AABB4& box)
{
PX_ASSERT(index<mSize);
mBoxes_X[index].initFrom(box);
mBoxes_YZ[index].initFrom(box);
}
PX_FORCE_INLINE PxU32 pushBack(const SIMD_AABB4& box)
{
const PxU32 index = mSize++;
setBounds(index, box);
return index;
}
private:
SIMD_AABB_X4* mBoxes_X;
SIMD_AABB_YZ4* mBoxes_YZ;
};
SplitBoxes::SplitBoxes() :
mBoxes_X (NULL),
mBoxes_YZ (NULL)
{
}
SplitBoxes::~SplitBoxes()
{
reset();
}
void SplitBoxes::reset(bool freeMemory)
{
if(freeMemory)
{
MBP_FREE(mBoxes_YZ);
MBP_FREE(mBoxes_X);
}
mBoxes_X = NULL;
mBoxes_YZ = NULL;
Boxes::reset();
}
void SplitBoxes::init(PxU32 size, PxU32 capacity, SIMD_AABB_X4* boxes_X, SIMD_AABB_YZ4* boxes_YZ)
{
reset();
mSize = size;
mCapacity = capacity;
mBoxes_X = boxes_X;
mBoxes_YZ = boxes_YZ;
}
void SplitBoxes::init(const SplitBoxes& boxes)
{
reset();
Boxes::init(boxes);
mBoxes_X = const_cast<SIMD_AABB_X4*>(boxes.getBoxes_X());
mBoxes_YZ = const_cast<SIMD_AABB_YZ4*>(boxes.getBoxes_YZ());
}
PxU32 SplitBoxes::resize()
{
const PxU32 capacity = mCapacity;
const PxU32 size = mSize;
// const PxU32 newCapacity = capacity ? capacity + DEFAULT_NB_ENTRIES : DEFAULT_NB_ENTRIES;
// const PxU32 newCapacity = capacity ? capacity*2 : DEFAULT_NB_ENTRIES;
const PxU32 newCapacity = capacity ? capacity*2 : DEFAULT_NB_ENTRIES;
mBoxes_X = resizeBoxesT(size, newCapacity, mBoxes_X);
mBoxes_YZ = resizeBoxesT(size, newCapacity, mBoxes_YZ);
mCapacity = newCapacity;
return newCapacity;
}
PxU32 SplitBoxes::resize(PxU32 incoming)
{
const PxU32 capacity = mCapacity;
const PxU32 size = mSize;
const PxU32 minCapacity = size + incoming;
if(minCapacity<capacity)
return capacity;
PxU32 newCapacity = capacity ? capacity*2 : DEFAULT_NB_ENTRIES;
if(newCapacity<minCapacity)
newCapacity=minCapacity;
mBoxes_X = resizeBoxesT(size, newCapacity, mBoxes_X);
mBoxes_YZ = resizeBoxesT(size, newCapacity, mBoxes_YZ);
mCapacity = newCapacity;
return newCapacity;
}
bool SplitBoxes::allocate(PxU32 nb)
{
if(nb<=mSize)
return false;
MBP_FREE(mBoxes_YZ);
MBP_FREE(mBoxes_X);
mBoxes_X = reinterpret_cast<SIMD_AABB_X4*>(MBP_ALLOC(sizeof(SIMD_AABB_X4)*(nb+NB_SENTINELS)));
mBoxes_YZ = reinterpret_cast<SIMD_AABB_YZ4*>(MBP_ALLOC(sizeof(SIMD_AABB_YZ4)*nb));
PX_ASSERT(!(size_t(mBoxes_YZ) & 15));
mSize = mCapacity = nb;
return true;
}
typedef SplitBoxes StaticBoxes;
typedef SplitBoxes DynamicBoxes;
///////////////////////////////////////////////////////////////////////////////
struct ABP_Object : public PxUserAllocated
{
PX_FORCE_INLINE ABP_Object() : mIndex(INVALID_ID)
{
#if PX_DEBUG
mUpdated = false;
#endif
}
private:
PxU32 mIndex; // Out-to-in, maps user handle to internal array. mIndex indexes either the static or dynamic array.
// PT: the type won't be available for removed objects so we have to store it there. That uses 2 bits.
// Then the "data" will need one more bit for marking sleeping objects so that leaves 28bits for the actual index.
PX_FORCE_INLINE void setData(PxU32 index, FilterType::Enum type)
{
// mIndex = index;
index <<= 2;
index |= type;
mIndex = index;
}
public:
// PT: TODO: rename "index" to data everywhere
PX_FORCE_INLINE void setActiveIndex(PxU32 index, FilterType::Enum type)
{
const PxU32 boxData = (index+index);
setData(boxData, type);
}
PX_FORCE_INLINE void setSleepingIndex(PxU32 index, FilterType::Enum type)
{
const PxU32 boxData = (index+index)|1;
PX_ASSERT(getType()==type);
setData(boxData, type);
}
PX_FORCE_INLINE FilterType::Enum getType() const
{
return FilterType::Enum(mIndex&3);
}
PX_FORCE_INLINE PxU32 getData() const
{
return mIndex>>2;
}
PX_FORCE_INLINE void invalidateIndex()
{
mIndex = INVALID_ID;
}
PX_FORCE_INLINE bool isValid() const
{
return mIndex != INVALID_ID;
}
#if PX_DEBUG
bool mUpdated;
#endif
};
typedef ABP_Object ABPEntry;
///////////////////////////////////////////////////////////////////////////////
//#define BIT_ARRAY_STACK 512
static PX_FORCE_INLINE PxU32 bitsToDwords(PxU32 nbBits)
{
return (nbBits>>5) + ((nbBits&31) ? 1 : 0);
}
// Use that one instead of an array of bools. Takes less ram, nearly as fast [no bounds checkings and so on].
class BitArray
{
public:
BitArray();
BitArray(PxU32 nbBits);
~BitArray();
bool init(PxU32 nbBits);
void empty();
void resize(PxU32 nbBits);
PX_FORCE_INLINE void checkResize(PxU32 bitNumber)
{
const PxU32 index = bitNumber>>5;
if(index>=mSize)
resize(bitNumber);
}
PX_FORCE_INLINE void setBitChecked(PxU32 bitNumber)
{
const PxU32 index = bitNumber>>5;
if(index>=mSize)
resize(bitNumber);
mBits[index] |= 1<<(bitNumber&31);
}
PX_FORCE_INLINE void clearBitChecked(PxU32 bitNumber)
{
const PxU32 index = bitNumber>>5;
if(index>=mSize)
resize(bitNumber);
mBits[index] &= ~(1<<(bitNumber&31));
}
// Data management
PX_FORCE_INLINE void setBit(PxU32 bitNumber) { mBits[bitNumber>>5] |= 1<<(bitNumber&31); }
PX_FORCE_INLINE void clearBit(PxU32 bitNumber) { mBits[bitNumber>>5] &= ~(1<<(bitNumber&31)); }
PX_FORCE_INLINE void toggleBit(PxU32 bitNumber) { mBits[bitNumber>>5] ^= 1<<(bitNumber&31); }
PX_FORCE_INLINE void clearAll() { PxMemZero(mBits, mSize*4); }
PX_FORCE_INLINE void setAll() { PxMemSet(mBits, 0xff, mSize*4); }
// Data access
PX_FORCE_INLINE PxIntBool isSet(PxU32 bitNumber) const { return PxIntBool(mBits[bitNumber>>5] & (1<<(bitNumber&31))); }
PX_FORCE_INLINE PxIntBool isSetChecked(PxU32 bitNumber) const
{
const PxU32 index = bitNumber>>5;
if(index>=mSize)
return 0;
return PxIntBool(mBits[index] & (1<<(bitNumber&31)));
}
PX_FORCE_INLINE const PxU32* getBits() const { return mBits; }
PX_FORCE_INLINE PxU32 getSize() const { return mSize; }
protected:
PxU32* mBits; //!< Array of bits
PxU32 mSize; //!< Size of the array in dwords
#ifdef BIT_ARRAY_STACK
PxU32 mStack[BIT_ARRAY_STACK];
#endif
};
///////////////////////////////////////////////////////////////////////////////
BitArray::BitArray() : mBits(NULL), mSize(0)
{
}
BitArray::BitArray(PxU32 nbBits) : mBits(NULL), mSize(0)
{
init(nbBits);
}
BitArray::~BitArray()
{
empty();
}
void BitArray::empty()
{
#ifdef BIT_ARRAY_STACK
if(mBits!=mStack)
#endif
MBP_FREE(mBits);
mBits = NULL;
mSize = 0;
}
bool BitArray::init(PxU32 nbBits)
{
mSize = bitsToDwords(nbBits);
// Get ram for n bits
#ifdef BIT_ARRAY_STACK
if(mBits!=mStack)
#endif
MBP_FREE(mBits);
#ifdef BIT_ARRAY_STACK
if(mSize>BIT_ARRAY_STACK)
#endif
mBits = reinterpret_cast<PxU32*>(MBP_ALLOC(sizeof(PxU32)*mSize));
#ifdef BIT_ARRAY_STACK
else
mBits = mStack;
#endif
// Set all bits to 0
clearAll();
return true;
}
void BitArray::resize(PxU32 nbBits)
{
const PxU32 newSize = bitsToDwords(nbBits+128);
PxU32* newBits = NULL;
#ifdef BIT_ARRAY_STACK
if(newSize>BIT_ARRAY_STACK)
#endif
{
// Old buffer was stack or allocated, new buffer is allocated
newBits = reinterpret_cast<PxU32*>(MBP_ALLOC(sizeof(PxU32)*newSize));
if(mSize)
PxMemCopy(newBits, mBits, sizeof(PxU32)*mSize);
}
#ifdef BIT_ARRAY_STACK
else
{
newBits = mStack;
if(mSize>BIT_ARRAY_STACK)
{
// Old buffer was allocated, new buffer is stack => copy to stack, shrink
CopyMemory(newBits, mBits, sizeof(PxU32)*BIT_ARRAY_STACK);
}
else
{
// Old buffer was stack, new buffer is stack => keep working on the same stack buffer, nothing to do
}
}
#endif
const PxU32 remain = newSize - mSize;
if(remain)
PxMemZero(newBits + mSize, remain*sizeof(PxU32));
#ifdef BIT_ARRAY_STACK
if(mBits!=mStack)
#endif
MBP_FREE(mBits);
mBits = newBits;
mSize = newSize;
}
///////////////////////////////////////////////////////////////////////////////
static ABP_Index* resizeMapping(PxU32 oldNbBoxes, PxU32 newNbBoxes, ABP_Index* mapping)
{
ABP_Index* newMapping = reinterpret_cast<ABP_Index*>(MBP_ALLOC(sizeof(ABP_Index)*newNbBoxes));
if(oldNbBoxes)
PxMemCopy(newMapping, mapping, oldNbBoxes*sizeof(ABP_Index));
MBP_FREE(mapping);
return newMapping;
}
struct ABP_Object;
#ifdef ABP_MT
struct DelayedPair
{
PxU32 mID0;
PxU32 mID1;
PxU32 mHash;
};
#endif
class ABP_PairManager : public PairManagerData
{
public:
ABP_PairManager();
~ABP_PairManager();
InternalPair* addPair (PxU32 id0, PxU32 id1);
void computeCreatedDeletedPairs (PxArray<BroadPhasePair>& createdPairs, PxArray<BroadPhasePair>& deletedPairs, const BitArray& updated, const BitArray& removed);
#ifdef ABP_MT
void addDelayedPair (PxArray<DelayedPair>& delayedPairs, const ABP_Index* mInToOut0, const ABP_Index* mInToOut1, PxU32 index0, PxU32 index1) const;
void addDelayedPairs (const PxArray<DelayedPair>& delayedPairs);
void addDelayedPairs2(PxArray<BroadPhasePair>& createdPairs, const PxArray<DelayedPair>& delayedPairs);
void resizeForNewPairs(PxU32 nbDelayedPairs);
#endif
const Bp::FilterGroup::Enum* mGroups;
const ABP_Index* mInToOut0;
const ABP_Index* mInToOut1;
const bool* mLUT;
};
///////////////////////////////////////////////////////////////////////////
struct ABP_SharedData
{
PX_FORCE_INLINE ABP_SharedData() :
mABP_Objects (NULL),
mABP_Objects_Capacity (0)
{
}
void resize(BpHandle userID);
PX_FORCE_INLINE void checkResize(PxU32 maxID)
{
if(mABP_Objects_Capacity<maxID+1)
resize(maxID);
}
ABP_Object* mABP_Objects;
PxU32 mABP_Objects_Capacity;
BitArray mUpdatedObjects; // Indexed by ABP_ObjectIndex
BitArray mRemovedObjects; // Indexed by ABP_ObjectIndex
};
void ABP_SharedData::resize(BpHandle userID)
{
const PxU32 oldCapacity = mABP_Objects_Capacity;
PxU32 newCapacity = mABP_Objects_Capacity ? mABP_Objects_Capacity*2 : 256;
if(newCapacity<userID+1)
newCapacity = userID+1;
ABP_Object* newObjects = PX_NEW(ABP_Object)[newCapacity];
if(mABP_Objects)
PxMemCopy(newObjects, mABP_Objects, oldCapacity*sizeof(ABP_Object));
#if PX_DEBUG
for(PxU32 i=oldCapacity;i<newCapacity;i++)
newObjects[i].mUpdated = false;
#endif
PX_DELETE_ARRAY(mABP_Objects);
mABP_Objects = newObjects;
mABP_Objects_Capacity = newCapacity;
}
class BoxManager
{
public:
BoxManager(FilterType::Enum type);
~BoxManager();
void reset();
void setSourceData(const PxBounds3* bounds, const float* distances)
{
mAABBManagerBounds = bounds;
mAABBManagerDistances = distances;
}
void addObjects(const BpHandle* PX_RESTRICT userIDs, PxU32 nb, ABP_SharedData* PX_RESTRICT sharedData);
void removeObject(ABPEntry& object, BpHandle userID);
void updateObject(ABPEntry& object, BpHandle userID);
void prepareData(RadixSortBuffered& rs, ABP_Object* PX_RESTRICT objects, PxU32 objectsCapacity, ABP_MM& memoryManager, PxU64 contextID);
// PX_FORCE_INLINE PxU32 isThereWorkToDo() const { return mNbUpdated; }
PX_FORCE_INLINE bool isThereWorkToDo() const { return mNbUpdated || mNbRemovedSleeping; } // PT: temp & test, maybe we do that differently in the end
PX_FORCE_INLINE PxU32 getNbUpdatedBoxes() const { return mNbUpdated; }
PX_FORCE_INLINE PxU32 getNbNonUpdatedBoxes() const { return mNbSleeping; }
PX_FORCE_INLINE const DynamicBoxes& getUpdatedBoxes() const { return mUpdatedBoxes; }
PX_FORCE_INLINE const DynamicBoxes& getSleepingBoxes() const { return mSleepingBoxes; }
PX_FORCE_INLINE const ABP_Index* getRemap_Updated() const { return mInToOut_Updated; }
PX_FORCE_INLINE const ABP_Index* getRemap_Sleeping() const { return mInToOut_Sleeping; }
#ifdef USE_ABP_BUCKETS
PX_FORCE_INLINE const PxBounds3& getUpdatedBounds() const { return mUpdatedBounds; }
#endif
private:
FilterType::Enum mType;
// PT: refs to source data (not owned). Currently separate arrays, ideally should be merged.
const PxBounds3* mAABBManagerBounds;
const float* mAABBManagerDistances;
// New & updated objects
#ifdef USE_ABP_BUCKETS
PxBounds3 mUpdatedBounds; // Bounds around updated dynamic objects, computed in prepareData().
#endif
ABP_Index* mInToOut_Updated; // Maps boxes to mABP_Objects
PxU32 mNbUpdated;
PxU32 mMaxNbUpdated;
DynamicBoxes mUpdatedBoxes;
// Sleeping objects
ABP_Index* mInToOut_Sleeping; // Maps boxes to mABP_Objects
PxU32 mNbSleeping;
DynamicBoxes mSleepingBoxes;
// Removed sleeping
PxU32 mNbRemovedSleeping;
void purgeRemovedFromSleeping(ABP_Object* PX_RESTRICT objects, PxU32 objectsCapacity);
};
BoxManager::BoxManager(FilterType::Enum type) :
mType (type),
mAABBManagerBounds (NULL),
mAABBManagerDistances (NULL),
mInToOut_Updated (NULL),
mNbUpdated (0),
mMaxNbUpdated (0),
mInToOut_Sleeping (NULL),
mNbSleeping (0),
mNbRemovedSleeping (0)
{
}
BoxManager::~BoxManager()
{
reset();
}
void BoxManager::reset()
{
mMaxNbUpdated = mNbUpdated = mNbSleeping = 0;
PX_FREE(mInToOut_Updated);
PX_FREE(mInToOut_Sleeping);
mUpdatedBoxes.reset();
mSleepingBoxes.reset();
}
static PX_FORCE_INLINE PxU32 isNewOrUpdated(PxU32 data)
{
return data & PX_SIGN_BITMASK;
}
static PX_FORCE_INLINE PxU32 markAsNewOrUpdated(PxU32 data)
{
return data | PX_SIGN_BITMASK;
}
static PX_FORCE_INLINE PxU32 removeNewOrUpdatedMark(PxU32 data)
{
return data & ~PX_SIGN_BITMASK;
}
// BpHandle = index in main/shared arrays like mAABBManagerBounds / mAABBManagerDistances
PX_COMPILE_TIME_ASSERT(sizeof(BpHandle)==sizeof(ABP_Index));
void BoxManager::addObjects(const BpHandle* PX_RESTRICT userIDs, PxU32 nb, ABP_SharedData* PX_RESTRICT sharedData)
{
// PT: we're called for each batch.
// PT: TODO: fix the BpHandle/ABP_Index mix
const PxU32 currentSize = mNbUpdated;
const PxU32 currentCapacity = mMaxNbUpdated;
const PxU32 newSize = currentSize + nb;
ABP_Index* remap;
if(newSize>currentCapacity)
{
const PxU32 minCapacity = PxMax(newSize, 1024u);
const PxU32 newCapacity = PxMax(minCapacity, currentCapacity*2);
PX_ASSERT(newCapacity>=newSize);
mMaxNbUpdated = newCapacity;
remap = resizeMapping(currentSize, newCapacity, mInToOut_Updated);
}
else
{
remap = mInToOut_Updated;
}
mInToOut_Updated = remap;
mNbUpdated = newSize;
// PT: we only copy the new handles for now. The bounds will be computed later in "prepareData".
// PT: TODO: do we even need to copy them? Can't we just reuse the source ptr directly?
{
PX_ASSERT(currentSize+nb<=mMaxNbUpdated);
remap += currentSize;
PxU32 nbToGo = nb;
while(nbToGo--)
{
const BpHandle userID = *userIDs++;
PX_ASSERT(!isNewOrUpdated(userID));
*remap++ = markAsNewOrUpdated(userID);
if(sharedData)
sharedData->mUpdatedObjects.setBit(userID);
}
}
}
// PT: TODO: inline this again
void BoxManager::removeObject(ABPEntry& object, BpHandle userID)
{
PX_UNUSED(userID);
const PxU32 boxData = object.getData();
const PxU32 boxIndex = boxData>>1;
if(boxData&1)
{
// Sleeping object.
PX_ASSERT(boxIndex<mNbSleeping);
PX_ASSERT(mInToOut_Sleeping[boxIndex]==userID);
PX_ASSERT(mInToOut_Sleeping[boxIndex] != INVALID_ID); // PT: can that happen if we update and remove an object in the same frame or does the AABB take care of it?
mInToOut_Sleeping[boxIndex] = INVALID_ID;
mNbRemovedSleeping++;
PX_ASSERT(mNbRemovedSleeping<=mNbSleeping);
}
else
{
// PT: remove active object, i.e. one that was previously in "updated" arrays.
PX_ASSERT(boxIndex<mNbUpdated);
PX_ASSERT(boxIndex<mMaxNbUpdated);
PX_ASSERT(mInToOut_Updated[boxIndex]==userID);
PX_ASSERT(mInToOut_Updated[boxIndex] != INVALID_ID);
// PT: TODO: do we need this at all? We could use 'userID' to access the removed bitmap...
mInToOut_Updated[boxIndex] = INVALID_ID;
}
}
// PT: TODO: inline this again
void BoxManager::updateObject(ABPEntry& object, BpHandle userID)
{
PX_UNUSED(userID);
const PxU32 boxData = object.getData();
const PxU32 boxIndex = boxData>>1;
if(boxData&1)
{
// PT: benchmark for this codepath: MBP.UpdateSleeping
// Sleeping object. We must reactivate it, i.e:
// - remove it from the array of sleeping objects
// - add it to the array of active/updated objects
// First we remove:
{
PX_ASSERT(boxIndex<mNbSleeping);
PX_ASSERT(mInToOut_Sleeping[boxIndex]==userID);
PX_ASSERT(mInToOut_Sleeping[boxIndex] != INVALID_ID);
mInToOut_Sleeping[boxIndex] = INVALID_ID;
mNbRemovedSleeping++;
PX_ASSERT(mNbRemovedSleeping<=mNbSleeping);
}
// Then we add
// PT: TODO: revisit / improve this maybe
addObjects(&userID, 1, NULL); // Don't pass sharedData because the bitmap has already been updated by the calling code
}
else
{
// Active object, i.e. it was updated in previous frame and it's already in mInToOut_Updated array
PX_ASSERT(boxIndex<mNbUpdated);
PX_ASSERT(boxIndex<mMaxNbUpdated);
PX_ASSERT(mInToOut_Updated[boxIndex]==userID);
mInToOut_Updated[boxIndex] = markAsNewOrUpdated(mInToOut_Updated[boxIndex]);
}
}
#if PX_DEBUG
static PX_FORCE_INLINE void computeMBPBounds_Check(SIMD_AABB4& aabb, const PxBounds3* PX_RESTRICT boundsXYZ, const PxReal* PX_RESTRICT contactDistances, const BpHandle index)
{
const PxBounds3& b = boundsXYZ[index];
const Vec4V contactDistanceV = V4Load(contactDistances[index]);
const Vec4V inflatedMinV = V4Sub(V4LoadU(&b.minimum.x), contactDistanceV);
const Vec4V inflatedMaxV = V4Add(V4LoadU(&b.maximum.x), contactDistanceV); // PT: this one is safe because we allocated one more box in the array (in BoundsArray::initEntry)
PX_ALIGN(16, PxVec4) boxMin;
PX_ALIGN(16, PxVec4) boxMax;
V4StoreA(inflatedMinV, &boxMin.x);
V4StoreA(inflatedMaxV, &boxMax.x);
aabb.mMinX = boxMin[0];
aabb.mMinY = boxMin[1];
aabb.mMinZ = boxMin[2];
aabb.mMaxX = boxMax[0];
aabb.mMaxY = boxMax[1];
aabb.mMaxZ = boxMax[2];
}
#endif
static PX_FORCE_INLINE void initSentinels(SIMD_AABB_X4* PX_RESTRICT boxesX, const PxU32 size)
{
for(PxU32 i=0;i<NB_SENTINELS;i++)
boxesX[size+i].initSentinel();
}
void BoxManager::purgeRemovedFromSleeping(ABP_Object* PX_RESTRICT objects, PxU32 objectsCapacity)
{
CHECKPOINT("purgeRemovedFromSleeping\n");
PX_UNUSED(objectsCapacity);
PX_ASSERT(mNbRemovedSleeping);
PX_ASSERT(mNbSleeping);
// PT: TODO: do we need to allocate separate buffers here?
// PT: we reach this codepath when:
// - no object has been added or updated
// - sleeping objects have been removed
// So we have to purge the removed objects from the sleeping array. We cannot entirely ignore the removals since we compute collisions
// between sleeping arrays and active arrays for bipartite cases. So we either have to remove the invalid entries immediately, or make
// sure they don't report collisions. We could ignore collisions when the remapped ID is "INVALID_ID" but that would be an additional
// test for each potential pair, i.e. it's a constant cost. We cannot tweak the removed bounding boxes (e.g. mark them as empty) because
// they are sorted, and the tweak would break the sorting and the collision loop. Keeping all removed objects in the array also means
// there is more data to parse all the time, i.e. there is a performance cost again. So for now we just remove all deleted entries here.
// ==> also tweaking the sleeping boxes might break the "merge sleeping" array code
PX_ASSERT(mNbRemovedSleeping<=mNbSleeping);
if(mNbRemovedSleeping==mNbSleeping)
{
// PT: remove everything
mSleepingBoxes.reset();
PX_FREE(mInToOut_Sleeping);
mNbSleeping = mNbRemovedSleeping = 0;
return;
}
const PxU32 expectedTotal = mNbSleeping - mNbRemovedSleeping;
PxU32 nbRemovedFound = 0;
PxU32 nbSleepingLeft = 0;
const PxU32 sleepCapacity = mSleepingBoxes.getCapacity();
if(expectedTotal>=sleepCapacity/2)
{
// PT: remove holes, keep same data buffers
SIMD_AABB_X4* boxesX = mSleepingBoxes.getBoxes_X();
SIMD_AABB_YZ4* boxesYZ = mSleepingBoxes.getBoxes_YZ();
ABP_Index* remap = mInToOut_Sleeping;
for(PxU32 i=0;i<mNbSleeping;i++)
{
const PxU32 boxIndex = remap[i];
if(boxIndex==INVALID_ID)
{
nbRemovedFound++;
}
else
{
PX_ASSERT(nbSleepingLeft<expectedTotal);
if(i!=nbSleepingLeft)
{
remap[nbSleepingLeft] = boxIndex;
boxesX[nbSleepingLeft] = boxesX[i];
boxesYZ[nbSleepingLeft] = boxesYZ[i];
}
{
PX_ASSERT(boxIndex<objectsCapacity);
objects[boxIndex].setSleepingIndex(nbSleepingLeft, mType);
}
nbSleepingLeft++;
}
}
PX_ASSERT(nbSleepingLeft==expectedTotal);
PX_ASSERT(nbSleepingLeft+nbRemovedFound==mNbSleeping);
PX_UNUSED(nbRemovedFound);
initSentinels(boxesX, expectedTotal);
mSleepingBoxes.mSize = expectedTotal;
}
else
{
// PT: remove holes, get fresh memory buffers
SIMD_AABB_X4* dstBoxesX = reinterpret_cast<SIMD_AABB_X4*>(MBP_ALLOC(sizeof(SIMD_AABB_X4)*(expectedTotal+NB_SENTINELS)));
SIMD_AABB_YZ4* dstBoxesYZ = reinterpret_cast<SIMD_AABB_YZ4*>(MBP_ALLOC(sizeof(SIMD_AABB_YZ4)*(expectedTotal+NB_SENTINELS)));
initSentinels(dstBoxesX, expectedTotal);
BpHandle* PX_RESTRICT dstRemap = reinterpret_cast<BpHandle*>(PX_ALLOC(expectedTotal*sizeof(BpHandle), "tmp"));
const SIMD_AABB_X4* PX_RESTRICT srcDataX = mSleepingBoxes.getBoxes_X();
const SIMD_AABB_YZ4* PX_RESTRICT srcDataYZ = mSleepingBoxes.getBoxes_YZ();
const ABP_Index* PX_RESTRICT srcRemap = mInToOut_Sleeping;
for(PxU32 i=0;i<mNbSleeping;i++)
{
const PxU32 boxIndex = srcRemap[i];
if(boxIndex==INVALID_ID)
{
nbRemovedFound++;
}
else
{
PX_ASSERT(nbSleepingLeft<expectedTotal);
dstRemap[nbSleepingLeft] = boxIndex;
dstBoxesX[nbSleepingLeft] = srcDataX[i];
dstBoxesYZ[nbSleepingLeft] = srcDataYZ[i];
{
PX_ASSERT(boxIndex<objectsCapacity);
objects[boxIndex].setSleepingIndex(nbSleepingLeft, mType);
}
nbSleepingLeft++;
}
}
PX_ASSERT(nbSleepingLeft==expectedTotal);
PX_ASSERT(nbSleepingLeft+nbRemovedFound==mNbSleeping);
// PT: TODO: double check all this
mSleepingBoxes.init(expectedTotal, expectedTotal, dstBoxesX, dstBoxesYZ);
PX_FREE(mInToOut_Sleeping);
mInToOut_Sleeping = dstRemap;
}
mNbSleeping = expectedTotal;
mNbRemovedSleeping = 0;
}
static PX_FORCE_INLINE PosXType2 getNextCandidateSorted(PxU32 offsetSorted, const PxU32 nbSorted, const SIMD_AABB_X4* PX_RESTRICT sortedDataX, const PxU32* PX_RESTRICT sleepingIndices)
{
return offsetSorted<nbSorted ? sortedDataX[sleepingIndices[offsetSorted]].mMinX : SentinelValue2;
}
static PX_FORCE_INLINE PosXType2 getNextCandidateNonSorted(PxU32 offsetNonSorted, const PxU32 nbToSort, const SIMD_AABB_X4* PX_RESTRICT toSortDataX)
{
return offsetNonSorted<nbToSort ? toSortDataX[offsetNonSorted].mMinX : SentinelValue2;
}
PX_COMPILE_TIME_ASSERT(sizeof(BpHandle)==sizeof(float));
void BoxManager::prepareData(RadixSortBuffered& /*rs*/, ABP_Object* PX_RESTRICT objects, PxU32 objectsCapacity, ABP_MM& memoryManager, PxU64 contextID)
{
PX_UNUSED(contextID);
// PT: mNbUpdated = number of objects in the updated buffer, could have been updated this frame or previous frame
const PxU32 size = mNbUpdated;
if(!size)
{
if(mNbRemovedSleeping)
{
// PT: benchmark for this codepath: MBP.RemoveHalfSleeping
purgeRemovedFromSleeping(objects, objectsCapacity);
}
return;
}
PX_ASSERT(mAABBManagerBounds);
PX_ASSERT(mAABBManagerDistances);
PX_ASSERT(mInToOut_Updated);
// Prepare new/updated objects
const ABP_Index* PX_RESTRICT remap = mInToOut_Updated;
const PxBounds3* PX_RESTRICT bounds = mAABBManagerBounds;
const float* PX_RESTRICT distances = mAABBManagerDistances;
float* PX_RESTRICT keys = NULL;
// newOrUpdatedIDs: *userIDs* of objects that have been added or updated this frame.
// sleepingIndices: *indices* (not userIDs) of non-updated objects within mInToOut_Updated
PxU32* tempBuffer = NULL;
PxU32* newOrUpdatedIDs = tempBuffer;
PxU32* sleepingIndices = tempBuffer;
// PT: mNbUpdated / mInToOut_Updated contains:
// 1) objects added this frame (from addObject(s))
// 2) objects updated this frame (from updateObject(s))
// 3) objects updated the frame before, not updated this frame, i.e. they are now "sleeping"
// 4) objects updated the frame before, then removed (from removeObject(s))
//
// We split the current array into separate groups:
// - 1) & 2) go to "temp", count is "nbUpdated"
// - 3) go to "temp2", count is "nbSleeping"
// - 4) are filtered out. No special processing is needed because the updated data is always parsed/recreated here anyway.
// So if we don't actively add removed objects to the new buffers, they get removed as a side-effect.
PxU32 nbUpdated = 0; // PT: number of objects updated this frame
PxU32 nbSleeping = 0;
PxU32 nbRemoved = 0; // PT: number of removed objects that were previously located in the udpated array
// PT: TODO: could we do the work within mInToOut_Updated?
// - updated objects have invalidated bounds so we don't need to preserve their order
// - we need to preserve the order of sleeping objects to avoid re-sorting them
// - we cannot use MTF since it breaks the order
// - parse backward and move sleeping objects to the back? but then we might have to move the sleeping boxes at the same time
for(PxU32 i=0;i<size;i++)
{
PX_ASSERT(i<mMaxNbUpdated);
const PxU32 index = remap[i];
if(index==INVALID_ID)
{
nbRemoved++;
}
else
{
if(!tempBuffer)
{
tempBuffer = reinterpret_cast<PxU32*>(memoryManager.frameAlloc(size*sizeof(PxU32)));
newOrUpdatedIDs = tempBuffer;
sleepingIndices = tempBuffer;
}
if(isNewOrUpdated(index))
{
// PT: new or updated object
if(!keys)
keys = reinterpret_cast<float*>(PX_ALLOC(size*sizeof(float), "tmp"));
// PT: in this version we compute the key on-the-fly, i.e. it will be computed twice overall. We could make this
// faster by merging bounds and distances inside the AABB manager.
const BpHandle userID = removeNewOrUpdatedMark(index);
keys[nbUpdated] = bounds[userID].minimum.x - distances[userID];
newOrUpdatedIDs[size - 1 - nbUpdated] = userID;
#if PX_DEBUG
SIMD_AABB4 aabb;
computeMBPBounds_Check(aabb, bounds, distances, userID);
PX_ASSERT(aabb.mMinX==keys[nbUpdated]);
#endif
nbUpdated++;
}
else
{
// PT: sleeping object
sleepingIndices[nbSleeping++] = i;
}
}
}
PX_ASSERT(nbRemoved + nbUpdated + nbSleeping == size);
PX_UNUSED(nbRemoved);
// PT: we must process the sleeping objects first, because the bounds of new sleeping objects are located in the existing updated buffers.
// PT: TODO: *HOWEVER* we could sort things right now and then reuse the "keys" buffer?
if(nbSleeping)
{
// PT: must merge these guys to current sleeping array
// They should already be in sorted order and we should already have the boxes.
#if PX_ENABLE_ASSERTS
const SIMD_AABB_YZ4* boxesYZ = mUpdatedBoxes.getBoxes_YZ();
float prevKey = -FLT_MAX;
for(PxU32 ii=0;ii<nbSleeping;ii++)
{
const PxU32 i = sleepingIndices[ii]; // PT: TODO: remove this indirection
const PxU32 index = remap[i];
PX_ASSERT(index!=INVALID_ID);
PX_ASSERT(!(index & PX_SIGN_BITMASK));
const BpHandle userID = index;
const float key = bounds[userID].minimum.x - distances[userID];
PX_ASSERT(key>=prevKey);
prevKey = key;
SIMD_AABB4 aabb;
computeMBPBounds_Check(aabb, bounds, distances, userID);
PX_ASSERT(aabb.mMinX==key);
#ifdef ABP_SIMD_OVERLAP
PX_ASSERT(boxesYZ[i].mMinY==-aabb.mMinY);
PX_ASSERT(boxesYZ[i].mMinZ==-aabb.mMinZ);
#else
PX_ASSERT(boxesYZ[i].mMinY==aabb.mMinY);
PX_ASSERT(boxesYZ[i].mMinZ==aabb.mMinZ);
#endif
PX_ASSERT(boxesYZ[i].mMaxY==aabb.mMaxY);
PX_ASSERT(boxesYZ[i].mMaxZ==aabb.mMaxZ);
}
#endif
if(mNbSleeping)
{
// PT: benchmark for this codepath: MBP.MergeSleeping
CHECKPOINT("Merging sleeping objects\n");
// PT: here, we need to merge two arrays of sleeping objects together:
// - the ones already contained inside mSleepingBoxes
// - the new sleeping objects currently contained in mUpdatedBoxes
// Both of them should already be sorted.
// PT: TODO: super subtle stuff going on there, to revisit
// PT: TODO: revisit names
PxU32 offsetSorted = 0;
const PxU32 nbSorted = nbSleeping;
const SIMD_AABB_X4* PX_RESTRICT sortedDataX = mUpdatedBoxes.getBoxes_X();
const SIMD_AABB_YZ4* PX_RESTRICT sortedDataYZ = mUpdatedBoxes.getBoxes_YZ();
const ABP_Index* PX_RESTRICT sortedRemap = mInToOut_Updated;
PxU32 offsetNonSorted = 0;
const PxU32 nbToSort = mNbSleeping;
const SIMD_AABB_X4* PX_RESTRICT toSortDataX = mSleepingBoxes.getBoxes_X();
const SIMD_AABB_YZ4* PX_RESTRICT toSortDataYZ = mSleepingBoxes.getBoxes_YZ();
const ABP_Index* PX_RESTRICT toSortRemap = mInToOut_Sleeping;
PX_ASSERT(mNbRemovedSleeping<=mNbSleeping);
#if PX_ENABLE_ASSERTS
{
PxU32 nbRemovedFound=0;
for(PxU32 i=0;i<mNbSleeping;i++)
{
if(toSortRemap[i]==INVALID_ID)
nbRemovedFound++;
}
PX_ASSERT(nbRemovedFound==mNbRemovedSleeping);
}
#endif
PosXType2 nextCandidateNonSorted = getNextCandidateNonSorted(offsetNonSorted, nbToSort, toSortDataX);
PosXType2 nextCandidateSorted = getNextCandidateSorted(offsetSorted, nbSorted, sortedDataX, sleepingIndices);
const PxU32 nbTotal = nbSorted + nbToSort - mNbRemovedSleeping;
SIMD_AABB_X4* dstBoxesX = reinterpret_cast<SIMD_AABB_X4*>(MBP_ALLOC(sizeof(SIMD_AABB_X4)*(nbTotal+NB_SENTINELS)));
SIMD_AABB_YZ4* dstBoxesYZ = reinterpret_cast<SIMD_AABB_YZ4*>(MBP_ALLOC(sizeof(SIMD_AABB_YZ4)*(nbTotal+NB_SENTINELS)));
initSentinels(dstBoxesX, nbTotal);
BpHandle* PX_RESTRICT dstRemap = reinterpret_cast<BpHandle*>(PX_ALLOC(nbTotal*sizeof(BpHandle), "tmp"));
PxU32 i=0;
PxU32 nbToGo = nbSorted + nbToSort;
while(nbToGo--)
{
PxU32 boxIndex;
{
if(nextCandidateNonSorted<nextCandidateSorted)
{
boxIndex = toSortRemap[offsetNonSorted];
if(boxIndex!=INVALID_ID)
{
dstRemap[i] = boxIndex;
dstBoxesX[i] = toSortDataX[offsetNonSorted];
dstBoxesYZ[i] = toSortDataYZ[offsetNonSorted];
}
offsetNonSorted++;
nextCandidateNonSorted = getNextCandidateNonSorted(offsetNonSorted, nbToSort, toSortDataX);
}
else
{
const PxU32 j = sleepingIndices[offsetSorted];
PX_ASSERT(j<size);
boxIndex = sortedRemap[j];
PX_ASSERT(boxIndex!=INVALID_ID);
dstRemap[i] = boxIndex;
dstBoxesX[i] = sortedDataX[j];
dstBoxesYZ[i] = sortedDataYZ[j];
offsetSorted++;
nextCandidateSorted = getNextCandidateSorted(offsetSorted, nbSorted, sortedDataX, sleepingIndices);
}
}
if(boxIndex!=INVALID_ID)
{
PX_ASSERT(boxIndex<objectsCapacity);
objects[boxIndex].setSleepingIndex(i, mType);
i++;
}
}
PX_ASSERT(i==nbTotal);
PX_ASSERT(offsetSorted+offsetNonSorted==nbSorted+nbToSort);
#if PX_DEBUG
{
PosXType2 prevSorted = dstBoxesX[0].mMinX;
for(PxU32 i2=1;i2<nbTotal;i2++)
{
PosXType2 v = dstBoxesX[i2].mMinX;
PX_ASSERT(prevSorted<=v);
prevSorted = v;
}
}
#endif
// PT: TODO: double check all this
mSleepingBoxes.init(nbTotal, nbTotal, dstBoxesX, dstBoxesYZ);
PX_FREE(mInToOut_Sleeping);
mInToOut_Sleeping = dstRemap;
mNbSleeping = nbTotal;
mNbRemovedSleeping = 0;
}
else
{
// PT: benchmark for this codepath: MBP.ActiveToSleeping
CHECKPOINT("Active objects become sleeping objects\n");
// PT: TODO: optimize allocs
BpHandle* inToOut_Sleeping;
if(mSleepingBoxes.allocate(nbSleeping))
{
inToOut_Sleeping = reinterpret_cast<BpHandle*>(PX_ALLOC(nbSleeping*sizeof(BpHandle), "tmp"));
PX_FREE(mInToOut_Sleeping);
mInToOut_Sleeping = inToOut_Sleeping;
}
else
{
inToOut_Sleeping = mInToOut_Sleeping;
}
const SIMD_AABB_X4* srcBoxesX = mUpdatedBoxes.getBoxes_X();
const SIMD_AABB_YZ4* srcBoxesYZ = mUpdatedBoxes.getBoxes_YZ();
SIMD_AABB_X4* dstBoxesX = mSleepingBoxes.getBoxes_X();
SIMD_AABB_YZ4* dstBoxesYZ = mSleepingBoxes.getBoxes_YZ();
initSentinels(dstBoxesX, nbSleeping);
for(PxU32 ii=0;ii<nbSleeping;ii++)
{
const PxU32 i = sleepingIndices[ii]; // PT: TODO: remove this indirection
const PxU32 index = remap[i];
PX_ASSERT(index!=INVALID_ID);
inToOut_Sleeping[ii] = index;
dstBoxesX[ii] = srcBoxesX[i];
dstBoxesYZ[ii] = srcBoxesYZ[i];
{
PX_ASSERT(index<objectsCapacity);
objects[index].setSleepingIndex(ii, mType);
}
}
mNbSleeping = nbSleeping;
}
}
else
{
// PT: no sleeping objects in updated buffer
if(mNbSleeping)
{
if(mNbRemovedSleeping)
{
// PT: benchmark for this codepath: MBP.UpdateSleeping
purgeRemovedFromSleeping(objects, objectsCapacity);
}
}
else
{
PX_ASSERT(!mNbRemovedSleeping);
}
}
if(nbUpdated)
{
// PT: benchmark for this codepath: MBP.Update64KObjects
CHECKPOINT("Create updated objects\n");
// PT: we need to sort here because we reuse the "keys" buffer just afterwards
PxU32* ranks0 = reinterpret_cast<PxU32*>(memoryManager.frameAlloc(sizeof(PxU32)*nbUpdated));
PxU32* ranks1 = reinterpret_cast<PxU32*>(memoryManager.frameAlloc(sizeof(PxU32)*nbUpdated));
StackRadixSort(rs, ranks0, ranks1);
const PxU32* sorted;
{
PX_PROFILE_ZONE("Sort", contextID);
sorted = rs.Sort(keys, nbUpdated).GetRanks();
}
// PT:
// - shuffle the remap table, store it in sorted order (we can probably use the "recyclable" array here again)
// - compute bounds on-the-fly, store them in sorted order
// PT: TODO: the "keys" array can be much bigger than stricly necessary here
BpHandle* inToOut_Updated_Sorted;
if(mUpdatedBoxes.allocate(nbUpdated))
{
inToOut_Updated_Sorted = reinterpret_cast<BpHandle*>(keys);
PX_FREE(mInToOut_Updated);
mInToOut_Updated = inToOut_Updated_Sorted;
}
else
{
PX_FREE(keys);
inToOut_Updated_Sorted = mInToOut_Updated;
}
SIMD_AABB_X4* PX_RESTRICT dstBoxesX = mUpdatedBoxes.getBoxes_X();
initSentinels(dstBoxesX, nbUpdated);
#ifdef USE_ABP_BUCKETS
Vec4V minV = V4Load(FLT_MAX);
Vec4V maxV = V4Load(-FLT_MAX);
#endif
// PT: TODO: parallel? Everything indexed by i should be fine, things indexed by userID might have some false sharing
for(PxU32 i=0;i<nbUpdated;i++)
{
const PxU32 sortedIndex = *sorted++;
const BpHandle userID = newOrUpdatedIDs[size - 1 - sortedIndex];
PX_ASSERT(i<size);
inToOut_Updated_Sorted[i] = userID;
{
PX_ASSERT(userID<objectsCapacity);
objects[userID].setActiveIndex(i, mType);
#if PX_DEBUG
objects[userID].mUpdated = false;
#endif
}
// PT: TODO: refactor with computeMBPBounds?
{
const PxBounds3& b = bounds[userID];
const Vec4V contactDistanceV = V4Load(distances[userID]);
const Vec4V inflatedMinV = V4Sub(V4LoadU(&b.minimum.x), contactDistanceV);
const Vec4V inflatedMaxV = V4Add(V4LoadU(&b.maximum.x), contactDistanceV); // PT: this one is safe because we allocated one more box in the array (in BoundsArray::initEntry)
#ifdef USE_ABP_BUCKETS
minV = V4Min(minV, inflatedMinV);
maxV = V4Max(maxV, inflatedMaxV);
#endif
// PT: TODO better
PX_ALIGN(16, PxVec4) boxMin;
PX_ALIGN(16, PxVec4) boxMax;
V4StoreA(inflatedMinV, &boxMin.x);
V4StoreA(inflatedMaxV, &boxMax.x);
mUpdatedBoxes.setBounds(i, boxMin, boxMax);
}
}
#ifdef USE_ABP_BUCKETS
StoreBounds(mUpdatedBounds, minV, maxV)
#endif
#ifndef TEST_PERSISTENT_MEMORY
memoryManager.frameFree(ranks1);
memoryManager.frameFree(ranks0);
#endif
}
else
{
// PT: benchmark for this codepath: MBP.MergeSleeping / MBP.Remove64KObjects
CHECKPOINT("Free updated objects\n");
PX_FREE(keys);
mUpdatedBoxes.reset();
PX_FREE(mInToOut_Updated);
}
mNbUpdated = mMaxNbUpdated = nbUpdated;
if(tempBuffer)
memoryManager.frameFree(tempBuffer);
}
#ifdef ABP_MT
namespace
{
struct PairManagerMT
{
const ABP_PairManager* mSharedPM;
PxArray<DelayedPair> mDelayedPairs;
const ABP_Index* mInToOut0;
const ABP_Index* mInToOut1;
//char mBuffer[256];
};
}
static PX_FORCE_INLINE void outputPair(PairManagerMT& pairManager, PxU32 index0, PxU32 index1)
{
pairManager.mSharedPM->addDelayedPair(pairManager.mDelayedPairs, pairManager.mInToOut0, pairManager.mInToOut1, index0, index1);
}
#endif
#ifdef ABP_MT2
#define NB_BIP_TASKS 15
enum ABP_TaskID
{
ABP_TASK_0,
ABP_TASK_1,
};
class ABP_InternalTask : public PxLightCpuTask
{
public:
ABP_InternalTask(ABP_TaskID id) : mBP(NULL), mID(id) {}
virtual const char* getName() const PX_OVERRIDE
{
return "ABP_InternalTask";
}
virtual void run() PX_OVERRIDE;
virtual bool isHighPriority() const PX_OVERRIDE { return true; }
BroadPhaseABP* mBP;
ABP_TaskID mID;
};
class ABP_CompleteBoxPruningStartTask;
class ABP_CompleteBoxPruningTask : public PxLightCpuTask
{
public:
ABP_CompleteBoxPruningTask() :
mStartTask(NULL),
mType(0),
mID(0)
{
}
virtual const char* getName() const PX_OVERRIDE
{
return "ABP_CompleteBoxPruningTask";
}
virtual void run() PX_OVERRIDE;
virtual bool isHighPriority() const PX_OVERRIDE { return true; }
ABP_CompleteBoxPruningStartTask* mStartTask;
PxU16 mType;
PxU16 mID;
PxU32 mCounter;
const SIMD_AABB_X4* mBoxListX;
const SIMD_AABB_YZ4* mBoxListYZ;
const PxU32* mRemap;
PxU32 mCounter4;
const SIMD_AABB_X4* mBoxListX4;
const SIMD_AABB_YZ4* mBoxListYZ4;
const PxU32* mRemap4;
PairManagerMT mPairs;
PX_FORCE_INLINE bool isThereWorkToDo() const
{
if(!mCounter)
return false;
if(mType)
return mCounter4!=0;
return true;
}
};
class ABP_CompleteBoxPruningEndTask : public PxLightCpuTask
{
public:
ABP_CompleteBoxPruningEndTask() : mStartTask(NULL) {}
virtual const char* getName() const PX_OVERRIDE
{
return "ABP_CompleteBoxPruningEndTask";
}
virtual void run() PX_OVERRIDE;
virtual bool isHighPriority() const PX_OVERRIDE { return true; }
ABP_CompleteBoxPruningStartTask* mStartTask;
};
class ABP_CompleteBoxPruningStartTask : public PxLightCpuTask
{
public:
ABP_CompleteBoxPruningStartTask();
virtual const char* getName() const PX_OVERRIDE
{
return "ABP_CompleteBoxPruningStartTask";
}
void setup(
//ABP_MM& memoryManager,
const PxBounds3& updatedBounds,
ABP_PairManager* PX_RESTRICT pairManager,
PxU32 nb,
const SIMD_AABB_X4* PX_RESTRICT listX,
const SIMD_AABB_YZ4* PX_RESTRICT listYZ,
const ABP_Index* PX_RESTRICT inputRemap,
PxU64 contextID);
void addDelayedPairs();
void addDelayedPairs2(PxArray<BroadPhasePair>& createdPairs);
virtual void run() PX_OVERRIDE;
virtual bool isHighPriority() const PX_OVERRIDE { return true; }
const SIMD_AABB_X4* mListX;
const SIMD_AABB_YZ4* mListYZ;
const ABP_Index* mInputRemap;
ABP_PairManager* mPairManager;
PxU32* mRemap;
SIMD_AABB_X4* mBoxListXBuffer;
SIMD_AABB_YZ4* mBoxListYZBuffer;
PxU32 mCounters[NB_BUCKETS];
SIMD_AABB_X4* mBoxListX[NB_BUCKETS];
SIMD_AABB_YZ4* mBoxListYZ[NB_BUCKETS];
PxU32* mRemapBase[NB_BUCKETS];
PxBounds3 mBounds;
PxU32 mNb;
ABP_CompleteBoxPruningTask mTasks[9];
ABP_CompleteBoxPruningEndTask mEndTask;
};
#endif
typedef BoxManager DynamicManager;
typedef BoxManager StaticManager;
class ABP : public PxUserAllocated
{
PX_NOCOPY(ABP)
public:
ABP(PxU64 contextID);
~ABP();
void preallocate(PxU32 nbObjects, PxU32 maxNbOverlaps);
void reset();
void freeBuffers();
void addStaticObjects(const BpHandle* userIDs, PxU32 nb, PxU32 maxID);
void addDynamicObjects(const BpHandle* userIDs, PxU32 nb, PxU32 maxID);
void addKinematicObjects(const BpHandle* userIDs, PxU32 nb, PxU32 maxID);
void removeObject(BpHandle userID);
void updateObject(BpHandle userID);
void findOverlaps(PxBaseTask* continuation, const Bp::FilterGroup::Enum* PX_RESTRICT groups, const bool* PX_RESTRICT lut);
PxU32 finalize(PxArray<BroadPhasePair>& createdPairs, PxArray<BroadPhasePair>& deletedPairs);
void shiftOrigin(const PxVec3& shift, const PxBounds3* boundsArray, const PxReal* contactDistances);
void setTransientData(const PxBounds3* bounds, const PxReal* contactDistance);
void Region_prepareOverlaps();
ABP_MM mMM;
BoxManager mSBM;
DynamicManager mDBM;
RadixSortBuffered mRS;
DynamicManager mKBM;
ABP_SharedData mShared;
ABP_PairManager mPairManager;
const PxU64 mContextID;
#ifdef ABP_MT2
ABP_InternalTask mTask0;
ABP_InternalTask mTask1;
ABP_CompleteBoxPruningStartTask mCompleteBoxPruningTask0;
ABP_CompleteBoxPruningStartTask mCompleteBoxPruningTask1;
ABP_CompleteBoxPruningTask mBipTasks[NB_BIP_TASKS];
void addDelayedPairs();
void addDelayedPairs2(PxArray<BroadPhasePair>& createdPairs);
#endif
};
#ifdef ABP_SIMD_OVERLAP
#define ABP_OVERLAP_TEST(x) SIMD_OVERLAP_TEST(x)
#else
#define ABP_OVERLAP_TEST(x) if(intersect2D(box0, x))
#endif
///////////////////////////////////////////////////////////////////////////////
ABP_PairManager::ABP_PairManager() :
mGroups (NULL),
mInToOut0 (NULL),
mInToOut1 (NULL),
mLUT (NULL)
{
}
///////////////////////////////////////////////////////////////////////////////
ABP_PairManager::~ABP_PairManager()
{
}
///////////////////////////////////////////////////////////////////////////////
InternalPair* ABP_PairManager::addPair(PxU32 index0, PxU32 index1)
{
const PxU32 id0 = mInToOut0[index0];
const PxU32 id1 = mInToOut1[index1];
PX_ASSERT(id0!=id1);
PX_ASSERT(id0!=INVALID_ID);
PX_ASSERT(id1!=INVALID_ID);
PX_ASSERT(mGroups);
{
if(!groupFiltering(mGroups[id0], mGroups[id1], mLUT))
return NULL;
}
return addPairInternal(id0, id1);
}
#ifdef ABP_MT
void ABP_PairManager::addDelayedPair(PxArray<DelayedPair>& delayedPairs, const ABP_Index* inToOut0, const ABP_Index* inToOut1, PxU32 index0, PxU32 index1) const
{
/*const*/ PxU32 id0 = inToOut0[index0];
/*const*/ PxU32 id1 = inToOut1[index1];
PX_ASSERT(id0!=id1);
PX_ASSERT(id0!=INVALID_ID);
PX_ASSERT(id1!=INVALID_ID);
PX_ASSERT(mGroups);
{
if(!groupFiltering(mGroups[id0], mGroups[id1], mLUT))
return;
}
if(1)
{
// Order the ids
sort(id0, id1);
const PxU32 fullHashValue = hash(id0, id1);
PxU32 hashValue = fullHashValue & mMask;
{
InternalPair* /*PX_RESTRICT*/ p = findPair(id0, id1, hashValue);
if(p)
{
p->setUpdated(); // ### PT: potential false sharing here
//return p; // Persistent pair
return; // Persistent pair
}
}
{
/*// This is a new pair
if(mNbActivePairs >= mHashSize)
hashValue = growPairs(fullHashValue);
const PxU32 pairIndex = mNbActivePairs++;
InternalPair* PX_RESTRICT p = &mActivePairs[pairIndex];
p->setNewPair(id0, id1);
mNext[pairIndex] = mHashTable[hashValue];
mHashTable[hashValue] = pairIndex;
return p;*/
DelayedPair* newPair = Cm::reserveContainerMemory(delayedPairs, 1);
newPair->mID0 = id0;
newPair->mID1 = id1;
newPair->mHash = fullHashValue;
}
}
}
void ABP_PairManager::resizeForNewPairs(PxU32 nbDelayedPairs)
{
PxU32 currentNbPairs = mNbActivePairs;
const PxU32 newNbPairs = currentNbPairs + nbDelayedPairs;
const PxU32 newHashSize = PxNextPowerOfTwo(newNbPairs + 1);
if(newHashSize == mHashSize)
return;
// Get more entries
mHashSize = newHashSize;
mMask = newHashSize - 1;
//reallocPairs();
{
MBP_FREE(mHashTable);
mHashTable = reinterpret_cast<PxU32*>(MBP_ALLOC(mHashSize*sizeof(PxU32)));
//storeDwords(mHashTable, mHashSize, INVALID_ID);
if(0)
{
PxU32 nb = mHashSize;
PxU32* dest = mHashTable;
while(nb--)
*dest++ = INVALID_ID;
}
else
PxMemSet(mHashTable, 0xff, mHashSize*sizeof(PxU32));
// Get some bytes for new entries
InternalPair* newPairs = reinterpret_cast<InternalPair*>(MBP_ALLOC(mHashSize * sizeof(InternalPair))); PX_ASSERT(newPairs);
PxU32* newNext = reinterpret_cast<PxU32*>(MBP_ALLOC(mHashSize * sizeof(PxU32))); PX_ASSERT(newNext);
// Copy old data if needed
if(currentNbPairs)
PxMemCopy(newPairs, mActivePairs, currentNbPairs*sizeof(InternalPair));
// ### check it's actually needed... probably only for pairs whose hash value was cut by the and
// yeah, since hash(id0, id1) is a constant
// However it might not be needed to recompute them => only less efficient but still ok
// PT: TODO: in heavy scenes like Avalanche100K the number of pairs gets close to a million, and this loop becomes very expensive. Revisit.
for(PxU32 i=0;i<currentNbPairs;i++)
{
const PxU32 hashValue = hash(mActivePairs[i].getId0(), mActivePairs[i].getId1()) & mMask; // New hash value with new mask
newNext[i] = mHashTable[hashValue];
mHashTable[hashValue] = i;
}
// Delete old data
MBP_FREE(mNext);
MBP_FREE(mActivePairs);
// Assign new pointer
mActivePairs = newPairs;
mNext = newNext;
}
}
void ABP_PairManager::addDelayedPairs(const PxArray<DelayedPair>& delayedPairs)
{
if(0)
{
PxU32 nbDelayedPairs = delayedPairs.size();
const DelayedPair* pairs = delayedPairs.begin();
while(nbDelayedPairs--)
{
const DelayedPair& dp = *pairs++;
const PxU32 fullHashValue = dp.mHash;
PxU32 hashValue = fullHashValue & mMask;
if(mNbActivePairs >= mHashSize)
hashValue = growPairs(fullHashValue);
const PxU32 pairIndex = mNbActivePairs++;
InternalPair* PX_RESTRICT p = &mActivePairs[pairIndex];
p->setNewPair(dp.mID0, dp.mID1);
mNext[pairIndex] = mHashTable[hashValue];
mHashTable[hashValue] = pairIndex;
}
}
else
{
PxU32 nbDelayedPairs = delayedPairs.size();
PxU32 currentNbPairs = mNbActivePairs;
//resizeForNewPairs(nbDelayedPairs);
{
const PxU32 mask = mMask;
PxU32* PX_RESTRICT hashTable = mHashTable;
PxU32* PX_RESTRICT next = mNext;
InternalPair* PX_RESTRICT internalPairs = mActivePairs;
const DelayedPair* PX_RESTRICT pairs = delayedPairs.begin();
while(nbDelayedPairs--)
{
const DelayedPair& dp = *pairs++;
const PxU32 fullHashValue = dp.mHash;
const PxU32 hashValue = fullHashValue & mask;
PX_ASSERT(currentNbPairs < mHashSize);
const PxU32 pairIndex = currentNbPairs++;
internalPairs[pairIndex].setNewPair(dp.mID0, dp.mID1);
next[pairIndex] = hashTable[hashValue];
hashTable[hashValue] = pairIndex;
}
mNbActivePairs = currentNbPairs;
}
}
}
void ABP_PairManager::addDelayedPairs2(PxArray<BroadPhasePair>& createdPairs, const PxArray<DelayedPair>& delayedPairs)
{
PxU32 nbDelayedPairs = delayedPairs.size();
PxU32 currentNbPairs = mNbActivePairs;
//resizeForNewPairs(nbDelayedPairs);
BroadPhasePair* newPair = Cm::reserveContainerMemory(createdPairs, nbDelayedPairs);
{
const PxU32 mask = mMask;
PxU32* PX_RESTRICT hashTable = mHashTable;
PxU32* PX_RESTRICT next = mNext;
InternalPair* PX_RESTRICT internalPairs = mActivePairs;
const DelayedPair* PX_RESTRICT pairs = delayedPairs.begin();
while(nbDelayedPairs--)
{
const DelayedPair& dp = *pairs++;
const PxU32 fullHashValue = dp.mHash;
const PxU32 hashValue = fullHashValue & mask;
PX_ASSERT(currentNbPairs < mHashSize);
const PxU32 pairIndex = currentNbPairs++;
internalPairs[pairIndex].setNewPair2(dp.mID0, dp.mID1);
{
newPair->mVolA = dp.mID0;
newPair->mVolB = dp.mID1;
newPair++;
}
next[pairIndex] = hashTable[hashValue];
hashTable[hashValue] = pairIndex;
}
mNbActivePairs = currentNbPairs;
}
}
#endif
///////////////////////////////////////////////////////////////////////////////
#if PX_INTEL_FAMILY
#define SIMD_OVERLAP_TEST_14a(box) _mm_movemask_ps(_mm_cmpngt_ps(b, _mm_load_ps(box)))==15
#define SIMD_OVERLAP_INIT_9c(box) \
__m128 b = _mm_shuffle_ps(_mm_load_ps(&box.mMinY), _mm_load_ps(&box.mMinY), 78);\
const float Coeff = -1.0f;\
b = _mm_mul_ps(b, _mm_load1_ps(&Coeff));
#define SIMD_OVERLAP_TEST_9c(box) \
const __m128 a = _mm_load_ps(&box.mMinY); \
const __m128 d = _mm_cmpge_ps(a, b); \
if(_mm_movemask_ps(d)==15)
#else
#define SIMD_OVERLAP_TEST_14a(box) BAllEqFFFF(V4IsGrtr(b, V4LoadA(box)))
#define SIMD_OVERLAP_INIT_9c(box) \
Vec4V b = V4PermZWXY(V4LoadA(&box.mMinY)); \
b = V4Mul(b, V4Load(-1.0f));
#define SIMD_OVERLAP_TEST_9c(box) \
const Vec4V a = V4LoadA(&box.mMinY); \
const Vec4V d = V4IsGrtrOrEq(a, b); \
if(BAllEqTTTT(d))
#endif
#ifdef ABP_SIMD_OVERLAP
#define SIMD_OVERLAP_PRELOAD_BOX0 SIMD_OVERLAP_INIT_9c(box0)
#define SIMD_OVERLAP_TEST(x) SIMD_OVERLAP_TEST_9c(x)
#else
#define SIMD_OVERLAP_PRELOAD_BOX0
#endif
#ifndef ABP_SIMD_OVERLAP
static PX_FORCE_INLINE int intersect2D(const SIMD_AABB_YZ4& a, const SIMD_AABB_YZ4& b)
{
/* if(
b.mMaxY < a.mMinY || a.mMaxY < b.mMinY
||
b.mMaxZ < a.mMinZ || a.mMaxZ < b.mMinZ
)
return 0;
return 1;*/
const bool b0 = b.mMaxY < a.mMinY;
const bool b1 = a.mMaxY < b.mMinY;
const bool b2 = b.mMaxZ < a.mMinZ;
const bool b3 = a.mMaxZ < b.mMinZ;
// const bool b4 = b0 || b1 || b2 || b3;
const bool b4 = b0 | b1 | b2 | b3;
return !b4;
}
#endif
static PX_FORCE_INLINE void outputPair(ABP_PairManager& pairManager, PxU32 index0, PxU32 index1)
{
pairManager.addPair(index0, index1);
}
template<const int codepath, class ABP_PairManagerT>
static void boxPruningKernel( PxU32 nb0, PxU32 nb1,
const SIMD_AABB_X4* PX_RESTRICT boxes0_X, const SIMD_AABB_X4* PX_RESTRICT boxes1_X,
const SIMD_AABB_YZ4* PX_RESTRICT boxes0_YZ, const SIMD_AABB_YZ4* PX_RESTRICT boxes1_YZ,
const ABP_Index* PX_RESTRICT inToOut0, const ABP_Index* PX_RESTRICT inToOut1,
ABP_PairManagerT* PX_RESTRICT pairManager)
{
pairManager->mInToOut0 = inToOut0;
pairManager->mInToOut1 = inToOut1;
PxU32 index0 = 0;
PxU32 runningIndex1 = 0;
while(runningIndex1<nb1 && index0<nb0)
{
const SIMD_AABB_X4& box0_X = boxes0_X[index0];
const PosXType2 maxLimit = box0_X.mMaxX;
const PosXType2 minLimit = box0_X.mMinX;
if(!codepath)
{
while(boxes1_X[runningIndex1].mMinX<minLimit)
runningIndex1++;
}
else
{
while(boxes1_X[runningIndex1].mMinX<=minLimit)
runningIndex1++;
}
const SIMD_AABB_YZ4& box0 = boxes0_YZ[index0];
SIMD_OVERLAP_PRELOAD_BOX0
if(gUseRegularBPKernel)
{
PxU32 index1 = runningIndex1;
while(boxes1_X[index1].mMinX<=maxLimit)
{
ABP_OVERLAP_TEST(boxes1_YZ[index1])
{
outputPair(*pairManager, index0, index1);
}
index1++;
}
}
else
{
PxU32 Offset = 0;
const char* const CurrentBoxListYZ = reinterpret_cast<const char*>(&boxes1_YZ[runningIndex1]);
const char* const CurrentBoxListX = reinterpret_cast<const char*>(&boxes1_X[runningIndex1]);
if(!gUnrollLoop)
{
while(*reinterpret_cast<const PosXType2*>(CurrentBoxListX + Offset)<=maxLimit)
{
const float* box = reinterpret_cast<const float*>(CurrentBoxListYZ + Offset*2);
#ifdef ABP_SIMD_OVERLAP
if(SIMD_OVERLAP_TEST_14a(box))
#else
if(intersect2D(box0, *reinterpret_cast<const SIMD_AABB_YZ4*>(box)))
#endif
{
const PxU32 Index1 = PxU32(CurrentBoxListX + Offset - reinterpret_cast<const char*>(boxes1_X))>>3;
outputPair(*pairManager, index0, Index1);
}
Offset += 8;
}
}
else
{
#define BIP_VERSION4
#ifdef BIP_VERSION4
#ifdef ABP_SIMD_OVERLAP
#define BLOCK4(x, label) {const float* box = reinterpret_cast<const float*>(CurrentBoxListYZ + Offset*2 + x*2); \
if(SIMD_OVERLAP_TEST_14a(box)) \
goto label; }
#else
#define BLOCK4(x, label) {const float* box = reinterpret_cast<const float*>(CurrentBoxListYZ + Offset*2 + x*2); \
if(intersect2D(box0, *reinterpret_cast<const SIMD_AABB_YZ4*>(box))) \
goto label; }
#endif
goto StartLoop4;
CODEALIGN16
FoundOverlap3:
Offset += 8;
CODEALIGN16
FoundOverlap2:
Offset += 8;
CODEALIGN16
FoundOverlap1:
Offset += 8;
CODEALIGN16
FoundOverlap0:
Offset += 8;
CODEALIGN16
FoundOverlap:
{
const PxU32 Index1 = PxU32(CurrentBoxListX + Offset - 8 - reinterpret_cast<const char*>(boxes1_X))>>3;
outputPair(*pairManager, index0, Index1);
}
CODEALIGN16
StartLoop4:
while(*reinterpret_cast<const PosXType2*>(CurrentBoxListX + Offset + 8*5)<=maxLimit)
{
BLOCK4(0, FoundOverlap0)
BLOCK4(8, FoundOverlap1)
BLOCK4(16, FoundOverlap2)
BLOCK4(24, FoundOverlap3)
Offset += 40;
BLOCK4(-8, FoundOverlap)
}
#undef BLOCK4
#endif
#ifdef ABP_SIMD_OVERLAP
#define BLOCK if(*reinterpret_cast<const PosXType2*>(CurrentBoxListX + Offset)<=maxLimit) \
{if(SIMD_OVERLAP_TEST_14a(reinterpret_cast<const float*>(CurrentBoxListYZ + Offset*2))) \
goto OverlapFound; \
Offset += 8;
#else
#define BLOCK if(*reinterpret_cast<const PosXType2*>(CurrentBoxListX + Offset)<=maxLimit) \
{if(intersect2D(box0, *reinterpret_cast<const SIMD_AABB_YZ4*>(CurrentBoxListYZ + Offset*2))) \
goto OverlapFound; \
Offset += 8;
#endif
goto LoopStart;
CODEALIGN16
OverlapFound:
{
const PxU32 Index1 = PxU32(CurrentBoxListX + Offset - reinterpret_cast<const char*>(boxes1_X))>>3;
outputPair(*pairManager, index0, Index1);
}
Offset += 8;
CODEALIGN16
LoopStart:
BLOCK
BLOCK
BLOCK
}
}
goto LoopStart;
}
#undef BLOCK
}
}
index0++;
}
}
template<class ABP_PairManagerT>
static /*PX_FORCE_INLINE*/ void doBipartiteBoxPruning_Leaf(
ABP_PairManagerT* PX_RESTRICT pairManager,
PxU32 nb0,
PxU32 nb1,
const SIMD_AABB_X4* PX_RESTRICT boxes0_X,
const SIMD_AABB_X4* PX_RESTRICT boxes1_X,
const SIMD_AABB_YZ4* PX_RESTRICT boxes0_YZ,
const SIMD_AABB_YZ4* PX_RESTRICT boxes1_YZ,
const ABP_Index* PX_RESTRICT remap0,
const ABP_Index* PX_RESTRICT remap1
)
{
PX_ASSERT(boxes0_X[nb0].isSentinel());
PX_ASSERT(boxes1_X[nb1].isSentinel());
boxPruningKernel<0>(nb0, nb1, boxes0_X, boxes1_X, boxes0_YZ, boxes1_YZ, remap0, remap1, pairManager);
boxPruningKernel<1>(nb1, nb0, boxes1_X, boxes0_X, boxes1_YZ, boxes0_YZ, remap1, remap0, pairManager);
}
template<class ABP_PairManagerT>
static PX_FORCE_INLINE void doBipartiteBoxPruning_Leaf(ABP_PairManagerT* PX_RESTRICT pairManager,
PxU32 nb0, PxU32 nb1, const SplitBoxes& boxes0, const SplitBoxes& boxes1, const ABP_Index* PX_RESTRICT remap0, const ABP_Index* PX_RESTRICT remap1)
{
doBipartiteBoxPruning_Leaf(pairManager, nb0, nb1, boxes0.getBoxes_X(), boxes1.getBoxes_X(), boxes0.getBoxes_YZ(), boxes1.getBoxes_YZ(), remap0, remap1);
}
template<class ABP_PairManagerT>
static void doCompleteBoxPruning_Leaf( ABP_PairManagerT* PX_RESTRICT pairManager, PxU32 nb,
const SIMD_AABB_X4* PX_RESTRICT boxes_X,
const SIMD_AABB_YZ4* PX_RESTRICT boxes_YZ,
const ABP_Index* PX_RESTRICT remap)
{
pairManager->mInToOut0 = remap;
pairManager->mInToOut1 = remap;
PxU32 index0 = 0;
PxU32 runningIndex = 0;
while(runningIndex<nb && index0<nb)
{
const SIMD_AABB_X4& box0_X = boxes_X[index0];
const PosXType2 maxLimit = box0_X.mMaxX;
const PosXType2 minLimit = box0_X.mMinX;
while(boxes_X[runningIndex++].mMinX<minLimit);
const SIMD_AABB_YZ4& box0 = boxes_YZ[index0];
SIMD_OVERLAP_PRELOAD_BOX0
if(gUseRegularBPKernel)
{
PxU32 index1 = runningIndex;
while(boxes_X[index1].mMinX<=maxLimit)
{
ABP_OVERLAP_TEST(boxes_YZ[index1])
{
outputPair(*pairManager, index0, index1);
}
index1++;
}
}
else
{
PxU32 Offset = 0;
const char* const CurrentBoxListYZ = reinterpret_cast<const char*>(&boxes_YZ[runningIndex]);
const char* const CurrentBoxListX = reinterpret_cast<const char*>(&boxes_X[runningIndex]);
if(!gUnrollLoop)
{
while(*reinterpret_cast<const PosXType2*>(CurrentBoxListX + Offset)<=maxLimit)
{
const float* box = reinterpret_cast<const float*>(CurrentBoxListYZ + Offset*2);
#ifdef ABP_SIMD_OVERLAP
if(SIMD_OVERLAP_TEST_14a(box))
#else
if(intersect2D(box0, *reinterpret_cast<const SIMD_AABB_YZ4*>(box)))
#endif
{
const PxU32 Index = PxU32(CurrentBoxListX + Offset - reinterpret_cast<const char*>(boxes_X))>>3;
outputPair(*pairManager, index0, Index);
}
Offset += 8;
}
}
else
{
#define VERSION4c
#ifdef VERSION4c
#define VERSION3 // Enable this as our safe loop
#ifdef ABP_SIMD_OVERLAP
#define BLOCK4(x, label) {const float* box = reinterpret_cast<const float*>(CurrentBoxListYZ + Offset*2 + x*2); \
if(SIMD_OVERLAP_TEST_14a(box)) \
goto label; }
#else
#define BLOCK4(x, label) {const SIMD_AABB_YZ4* box = reinterpret_cast<const SIMD_AABB_YZ4*>(CurrentBoxListYZ + Offset*2 + x*2); \
if(intersect2D(box0, *box)) \
goto label; }
#endif
goto StartLoop4;
CODEALIGN16
FoundOverlap3:
Offset += 8;
CODEALIGN16
FoundOverlap2:
Offset += 8;
CODEALIGN16
FoundOverlap1:
Offset += 8;
CODEALIGN16
FoundOverlap0:
Offset += 8;
CODEALIGN16
FoundOverlap:
{
const PxU32 Index = PxU32(CurrentBoxListX + Offset - 8 - reinterpret_cast<const char*>(boxes_X))>>3;
outputPair(*pairManager, index0, Index);
}
CODEALIGN16
StartLoop4:
while(*reinterpret_cast<const PosXType2*>(CurrentBoxListX + Offset + 8*5)<=maxLimit)
{
BLOCK4(0, FoundOverlap0)
BLOCK4(8, FoundOverlap1)
BLOCK4(16, FoundOverlap2)
BLOCK4(24, FoundOverlap3)
Offset += 40;
BLOCK4(-8, FoundOverlap)
}
#endif
#define VERSION3
#ifdef VERSION3
#ifdef ABP_SIMD_OVERLAP
#define BLOCK if(*reinterpret_cast<const PosXType2*>(CurrentBoxListX + Offset)<=maxLimit) \
{if(SIMD_OVERLAP_TEST_14a(reinterpret_cast<const float*>(CurrentBoxListYZ + Offset*2))) \
goto BeforeLoop; \
Offset += 8;
#else
#define BLOCK if(*reinterpret_cast<const PosXType2*>(CurrentBoxListX + Offset)<=maxLimit) \
{if(intersect2D(box0, *reinterpret_cast<const SIMD_AABB_YZ4*>(CurrentBoxListYZ + Offset*2))) \
goto BeforeLoop; \
Offset += 8;
#endif
goto StartLoop;
CODEALIGN16
BeforeLoop:
{
const PxU32 Index = PxU32(CurrentBoxListX + Offset - reinterpret_cast<const char*>(boxes_X))>>3;
outputPair(*pairManager, index0, Index);
Offset += 8;
}
CODEALIGN16
StartLoop:
BLOCK
BLOCK
BLOCK
BLOCK
BLOCK
}
}
}
}
goto StartLoop;
}
#endif
}
}
index0++;
}
}
#ifdef USE_ABP_BUCKETS
static const PxU8 gCodes[] = { 4, 4, 4, 255, 4, 3, 2, 255,
4, 1, 0, 255, 255, 255, 255, 255 };
static PX_FORCE_INLINE PxU8 classifyBoxNew(const SIMD_AABB_YZ4& boxYZ, const float limitY, const float limitZ)
{
#ifdef ABP_SIMD_OVERLAP
// PT: mins have been negated for SIMD tests
const bool upperPart = (-boxYZ.mMinZ) > limitZ;
const bool rightPart = (-boxYZ.mMinY) > limitY;
#else
const bool upperPart = boxYZ.mMinZ > limitZ;
const bool rightPart = boxYZ.mMinY > limitY;
#endif
const bool lowerPart = boxYZ.mMaxZ < limitZ;
const bool leftPart = boxYZ.mMaxY < limitY;
// Table-based box classification avoids many branches
const PxU32 Code = PxU32(rightPart)|(PxU32(leftPart)<<1)|(PxU32(upperPart)<<2)|(PxU32(lowerPart)<<3);
PX_ASSERT(gCodes[Code]!=255);
return gCodes[Code];
}
#ifdef RECURSE_LIMIT
static void CompleteBoxPruning_Recursive(
ABP_MM& memoryManager,
ABP_PairManager* PX_RESTRICT pairManager,
PxU32 nb,
const SIMD_AABB_X4* PX_RESTRICT listX,
const SIMD_AABB_YZ4* PX_RESTRICT listYZ,
const ABP_Index* PX_RESTRICT remap,
const ABPEntry* PX_RESTRICT objects)
{
// printf("CompleteBoxPruning_Recursive %d\n", nb);
if(!nb)
return;
/*__declspec(align(16))*/ float mergedMin[4];
/*__declspec(align(16))*/ float mergedMax[4];
{
//#ifdef SAFE_VERSION
Vec4V maxV = V4LoadA(&listYZ[0].mMinY);
for(PxU32 i=1;i<nb;i++)
maxV = V4Max(maxV, V4LoadA(&listYZ[i].mMinY));
PX_ALIGN(16, PxVec4) tmp;
V4StoreA(maxV, &tmp.x);
mergedMin[1] = -tmp.x;
mergedMin[2] = -tmp.y;
mergedMax[1] = tmp.z;
mergedMax[2] = tmp.w;
//#endif
}
const float limitY = (mergedMax[1] + mergedMin[1]) * 0.5f;
const float limitZ = (mergedMax[2] + mergedMin[2]) * 0.5f;
// PT: TODO: revisit allocs
SIMD_AABB_X4* BoxListXBuffer = reinterpret_cast<SIMD_AABB_X4*>(memoryManager.frameAlloc(sizeof(SIMD_AABB_X4)*(nb+NB_SENTINELS*NB_BUCKETS)));
SIMD_AABB_YZ4* BoxListYZBuffer = reinterpret_cast<SIMD_AABB_YZ4*>(memoryManager.frameAlloc(sizeof(SIMD_AABB_YZ4)*nb));
PxU32 Counters[NB_BUCKETS];
for(PxU32 i=0;i<NB_BUCKETS;i++)
Counters[i] = 0;
PxU32* Remap = reinterpret_cast<PxU32*>(memoryManager.frameAlloc(sizeof(PxU32)*nb));
PxU8* Indices = reinterpret_cast<PxU8*>(memoryManager.frameAlloc(sizeof(PxU8)*nb));
for(PxU32 i=0;i<nb;i++)
{
const PxU8 index = classifyBoxNew(listYZ[i], limitY, limitZ);
Indices[i] = index;
Counters[index]++;
}
SIMD_AABB_X4* BoxListX[NB_BUCKETS];
SIMD_AABB_YZ4* BoxListYZ[NB_BUCKETS];
PxU32* RemapBase[NB_BUCKETS];
{
SIMD_AABB_X4* CurrentBoxListXBuffer = BoxListXBuffer;
SIMD_AABB_YZ4* CurrentBoxListYZBuffer = BoxListYZBuffer;
PxU32* CurrentRemap = Remap;
for(PxU32 i=0;i<NB_BUCKETS;i++)
{
const PxU32 Nb = Counters[i];
BoxListX[i] = CurrentBoxListXBuffer;
BoxListYZ[i] = CurrentBoxListYZBuffer;
RemapBase[i] = CurrentRemap;
CurrentBoxListXBuffer += Nb+NB_SENTINELS;
CurrentBoxListYZBuffer += Nb;
CurrentRemap += Nb;
}
PX_ASSERT(CurrentBoxListXBuffer == BoxListXBuffer + nb + NB_SENTINELS*NB_BUCKETS);
PX_ASSERT(CurrentBoxListYZBuffer == BoxListYZBuffer + nb);
PX_ASSERT(CurrentRemap == Remap + nb);
}
for(PxU32 i=0;i<NB_BUCKETS;i++)
Counters[i] = 0;
for(PxU32 i=0;i<nb;i++)
{
const PxU32 SortedIndex = i;
const PxU32 TargetBucket = PxU32(Indices[SortedIndex]);
const PxU32 IndexInTarget = Counters[TargetBucket]++;
SIMD_AABB_X4* TargetBoxListX = BoxListX[TargetBucket];
SIMD_AABB_YZ4* TargetBoxListYZ = BoxListYZ[TargetBucket];
PxU32* TargetRemap = RemapBase[TargetBucket];
TargetRemap[IndexInTarget] = remap[SortedIndex];
TargetBoxListX[IndexInTarget] = listX[SortedIndex];
TargetBoxListYZ[IndexInTarget] = listYZ[SortedIndex];
}
memoryManager.frameFree(Indices);
for(PxU32 i=0;i<NB_BUCKETS;i++)
{
SIMD_AABB_X4* TargetBoxListX = BoxListX[i];
const PxU32 IndexInTarget = Counters[i];
for(PxU32 j=0;j<NB_SENTINELS;j++)
TargetBoxListX[IndexInTarget+j].initSentinel();
}
{
const PxU32 limit = RECURSE_LIMIT;
for(PxU32 i=0;i<NB_BUCKETS;i++)
{
if(Counters[i]<limit || Counters[i]==nb)
doCompleteBoxPruning_Leaf( pairManager,
Counters[i],
BoxListX[i], BoxListYZ[i],
RemapBase[i],
objects);
else
CompleteBoxPruning_Recursive(memoryManager, pairManager,
Counters[i],
BoxListX[i], BoxListYZ[i],
RemapBase[i],
objects);
}
}
{
for(PxU32 i=0;i<NB_BUCKETS-1;i++)
{
doBipartiteBoxPruning_Leaf(pairManager, objects,
Counters[i], Counters[NB_BUCKETS-1],
BoxListX[i], BoxListX[NB_BUCKETS-1], BoxListYZ[i], BoxListYZ[NB_BUCKETS-1],
RemapBase[i], RemapBase[NB_BUCKETS-1]
);
}
}
memoryManager.frameFree(Remap);
memoryManager.frameFree(BoxListYZBuffer);
memoryManager.frameFree(BoxListXBuffer);
}
#endif
#ifdef ABP_MT2
void ABP_CompleteBoxPruningTask::run()
{
// printf("Running ABP_CompleteBoxPruningTask\n");
//printf("ABP_Task_%d - thread ID %d\n", mID, PxU32(PxThread::getId()));
//printf("Count: %d\n", mCounter);
bool runComplete = false;
bool runBipartite = false;
if(mType==0)
runComplete = true;
else
runBipartite = true;
if(runComplete)
doCompleteBoxPruning_Leaf(&mPairs, mCounter, mBoxListX, mBoxListYZ, mRemap);
if(runBipartite)
doBipartiteBoxPruning_Leaf(&mPairs,
mCounter, mCounter4,
mBoxListX, mBoxListX4,
mBoxListYZ, mBoxListYZ4,
mRemap, mRemap4);
}
void ABP_CompleteBoxPruningEndTask::run()
{
// printf("Running ABP_CompleteBoxPruningEndTask\n");
//memoryManager.frameFree(Remap);
//memoryManager.frameFree(BoxListYZBuffer);
//memoryManager.frameFree(BoxListXBuffer);
// PT: TODO: revisit allocs
PX_FREE(mStartTask->mRemap);
PX_FREE(mStartTask->mBoxListYZBuffer);
PX_FREE(mStartTask->mBoxListXBuffer);
}
ABP_CompleteBoxPruningStartTask::ABP_CompleteBoxPruningStartTask() :
mListX (NULL),
mListYZ (NULL),
mInputRemap (NULL),
mPairManager (NULL),
mRemap (NULL),
mBoxListXBuffer (NULL),
mBoxListYZBuffer(NULL),
mNb (0)
{
}
void ABP_CompleteBoxPruningStartTask::setup(
//ABP_MM& memoryManager,
const PxBounds3& updatedBounds,
ABP_PairManager* PX_RESTRICT pairManager,
PxU32 nb,
const SIMD_AABB_X4* PX_RESTRICT listX,
const SIMD_AABB_YZ4* PX_RESTRICT listYZ,
const ABP_Index* PX_RESTRICT inputRemap,
PxU64 contextID)
{
mListX = listX;
mListYZ = listYZ;
mInputRemap = inputRemap;
mPairManager = pairManager;
mBounds = updatedBounds;
mContextID = contextID;
mNb = nb;
// PT: TODO: revisit allocs
//mBoxListXBuffer = reinterpret_cast<SIMD_AABB_X4*>(memoryManager.frameAlloc(sizeof(SIMD_AABB_X4)*(nb+NB_SENTINELS*NB_BUCKETS)));
//mBoxListYZBuffer = reinterpret_cast<SIMD_AABB_YZ4*>(memoryManager.frameAlloc(sizeof(SIMD_AABB_YZ4)*nb));
mBoxListXBuffer = reinterpret_cast<SIMD_AABB_X4*>(PX_ALLOC(sizeof(SIMD_AABB_X4)*(nb+NB_SENTINELS*NB_BUCKETS), "mBoxListXBuffer"));
mBoxListYZBuffer = reinterpret_cast<SIMD_AABB_YZ4*>(PX_ALLOC(sizeof(SIMD_AABB_YZ4)*nb, "mBoxListYZBuffer"));
//mRemap = reinterpret_cast<PxU32*>(memoryManager.frameAlloc(sizeof(PxU32)*nb));
mRemap = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*nb, "mRemap"));
mEndTask.mStartTask = this;
for(PxU32 i=0;i<9;i++)
mTasks[i].mStartTask = this;
}
void ABP_CompleteBoxPruningStartTask::run()
{
// printf("Running ABP_CompleteBoxPruningStartTask\n");
const SIMD_AABB_X4* PX_RESTRICT listX = mListX;
const SIMD_AABB_YZ4* PX_RESTRICT listYZ = mListYZ;
const ABP_Index* PX_RESTRICT remap = mInputRemap;
const PxU32 nb = mNb;
PxU32* PX_RESTRICT Remap = mRemap;
SIMD_AABB_X4* PX_RESTRICT BoxListXBuffer = mBoxListXBuffer;
SIMD_AABB_YZ4* PX_RESTRICT BoxListYZBuffer = mBoxListYZBuffer;
PxU32* PX_RESTRICT Counters = mCounters;
SIMD_AABB_X4** PX_RESTRICT BoxListX = mBoxListX;
SIMD_AABB_YZ4** PX_RESTRICT BoxListYZ = mBoxListYZ;
PxU32** PX_RESTRICT RemapBase = mRemapBase;
{
PX_PROFILE_ZONE("ABP_CompleteBoxPruningStartTask - Run", mContextID);
// PT: TODO: revisit allocs
//BoxListXBuffer = reinterpret_cast<SIMD_AABB_X4*>(memoryManager.frameAlloc(sizeof(SIMD_AABB_X4)*(nb+NB_SENTINELS*NB_BUCKETS)));
//BoxListYZBuffer = reinterpret_cast<SIMD_AABB_YZ4*>(memoryManager.frameAlloc(sizeof(SIMD_AABB_YZ4)*nb));
const PxVec3& mergedMin = mBounds.minimum;
const PxVec3& mergedMax = mBounds.maximum;
const float limitY = (mergedMax[1] + mergedMin[1]) * 0.5f;
const float limitZ = (mergedMax[2] + mergedMin[2]) * 0.5f;
for(PxU32 i=0;i<NB_BUCKETS;i++)
Counters[i] = 0;
//Remap = reinterpret_cast<PxU32*>(memoryManager.frameAlloc(sizeof(PxU32)*nb));
// PT: TODO: revisit allocs
//PxU8* Indices = reinterpret_cast<PxU8*>(memoryManager.frameAlloc(sizeof(PxU8)*nb));
PxU8* Indices = reinterpret_cast<PxU8*>(PX_ALLOC(sizeof(PxU8)*nb, "Indices"));
{
PX_PROFILE_ZONE("BoxPruning - ClassifyBoxes", mContextID);
for(PxU32 i=0;i<nb;i++)
{
const PxU8 index = classifyBoxNew(listYZ[i], limitY, limitZ);
Indices[i] = index;
Counters[index]++;
}
}
{
SIMD_AABB_X4* CurrentBoxListXBuffer = BoxListXBuffer;
SIMD_AABB_YZ4* CurrentBoxListYZBuffer = BoxListYZBuffer;
PxU32* CurrentRemap = Remap;
for(PxU32 i=0;i<NB_BUCKETS;i++)
{
const PxU32 Nb = Counters[i];
BoxListX[i] = CurrentBoxListXBuffer;
BoxListYZ[i] = CurrentBoxListYZBuffer;
RemapBase[i] = CurrentRemap;
CurrentBoxListXBuffer += Nb+NB_SENTINELS;
CurrentBoxListYZBuffer += Nb;
CurrentRemap += Nb;
}
PX_ASSERT(CurrentBoxListXBuffer == BoxListXBuffer + nb + NB_SENTINELS*NB_BUCKETS);
PX_ASSERT(CurrentBoxListYZBuffer == BoxListYZBuffer + nb);
PX_ASSERT(CurrentRemap == Remap + nb);
}
for(PxU32 i=0;i<NB_BUCKETS;i++)
Counters[i] = 0;
for(PxU32 i=0;i<nb;i++)
{
const PxU32 SortedIndex = i;
const PxU32 TargetBucket = PxU32(Indices[SortedIndex]);
const PxU32 IndexInTarget = Counters[TargetBucket]++;
SIMD_AABB_X4* TargetBoxListX = BoxListX[TargetBucket];
SIMD_AABB_YZ4* TargetBoxListYZ = BoxListYZ[TargetBucket];
PxU32* TargetRemap = RemapBase[TargetBucket];
TargetRemap[IndexInTarget] = remap[SortedIndex];
TargetBoxListX[IndexInTarget] = listX[SortedIndex];
TargetBoxListYZ[IndexInTarget] = listYZ[SortedIndex];
}
//memoryManager.frameFree(Indices);
PX_FREE(Indices);
for(PxU32 i=0;i<NB_BUCKETS;i++)
{
SIMD_AABB_X4* TargetBoxListX = BoxListX[i];
const PxU32 IndexInTarget = Counters[i];
for(PxU32 j=0;j<NB_SENTINELS;j++)
TargetBoxListX[IndexInTarget+j].initSentinel();
}
}
for(PxU32 i=0;i<8;i++)
{
mTasks[i].mCounter = Counters[i/2];
mTasks[i].mBoxListX = BoxListX[i/2];
mTasks[i].mBoxListYZ = BoxListYZ[i/2];
mTasks[i].mRemap = RemapBase[i/2];
mTasks[i].mType = i&1;
mTasks[i].mCounter4 = Counters[4];
mTasks[i].mBoxListX4 = BoxListX[4];
mTasks[i].mBoxListYZ4 = BoxListYZ[4];
mTasks[i].mRemap4 = RemapBase[4];
mTasks[i].mPairs.mSharedPM = mPairManager;
//mTasks[i].mPairs.mDelayedPairs.reserve(10000);
}
PxU32 i=8;
{
mTasks[i].mCounter = Counters[4];
mTasks[i].mBoxListX = BoxListX[4];
mTasks[i].mBoxListYZ = BoxListYZ[4];
mTasks[i].mRemap = RemapBase[4];
mTasks[i].mType = 0;
mTasks[i].mCounter4 = Counters[4];
mTasks[i].mBoxListX4 = BoxListX[4];
mTasks[i].mBoxListYZ4 = BoxListYZ[4];
mTasks[i].mRemap4 = RemapBase[4];
mTasks[i].mPairs.mSharedPM = mPairManager;
//mTasks[i].mPairs.mDelayedPairs.reserve(10000);
}
for(PxU32 k=0; k<8+1; k++)
{
if(mTasks[k].isThereWorkToDo())
{
mTasks[k].mID = PxU16(k);
mTasks[k].setContinuation(getContinuation());
}
}
for(PxU32 k=0; k<8+1; k++)
{
if(mTasks[k].isThereWorkToDo())
mTasks[k].removeReference();
}
}
void ABP_CompleteBoxPruningStartTask::addDelayedPairs()
{
PX_PROFILE_ZONE("ABP_CompleteBoxPruningStartTask - add delayed pairs", mContextID);
PxU32 nbDelayedPairs = 0;
for(PxU32 k=0; k<9; k++)
nbDelayedPairs += mTasks[k].mPairs.mDelayedPairs.size();
if(nbDelayedPairs)
{
{
PX_PROFILE_ZONE("BroadPhaseABP - resizeForNewPairs", mContextID);
mPairManager->resizeForNewPairs(nbDelayedPairs);
}
for(PxU32 k=0; k<9; k++)
mPairManager->addDelayedPairs(mTasks[k].mPairs.mDelayedPairs);
}
}
void ABP_CompleteBoxPruningStartTask::addDelayedPairs2(PxArray<BroadPhasePair>& createdPairs)
{
PX_PROFILE_ZONE("ABP_CompleteBoxPruningStartTask - add delayed pairs", mContextID);
PxU32 nbDelayedPairs = 0;
for(PxU32 k=0; k<9; k++)
nbDelayedPairs += mTasks[k].mPairs.mDelayedPairs.size();
if(nbDelayedPairs)
{
{
PX_PROFILE_ZONE("BroadPhaseABP - resizeForNewPairs", mContextID);
mPairManager->resizeForNewPairs(nbDelayedPairs);
}
for(PxU32 k=0; k<9; k++)
mPairManager->addDelayedPairs2(createdPairs, mTasks[k].mPairs.mDelayedPairs);
}
}
#endif
#ifndef USE_ALTERNATIVE_VERSION
static void CompleteBoxPruning_Version16(
#ifdef ABP_MT2
ABP_CompleteBoxPruningStartTask& completeBoxPruningTask,
#endif
ABP_MM& memoryManager,
const PxBounds3& updatedBounds,
ABP_PairManager* PX_RESTRICT pairManager,
PxU32 nb,
const SIMD_AABB_X4* PX_RESTRICT listX,
const SIMD_AABB_YZ4* PX_RESTRICT listYZ,
const ABP_Index* PX_RESTRICT remap,
PxBaseTask* continuation, PxU64 contextID)
{
PX_UNUSED(contextID);
PX_UNUSED(continuation);
if(!nb)
return;
#ifdef ABP_MT2
if(continuation)
{
completeBoxPruningTask.setup(updatedBounds, pairManager, nb, listX, listYZ, remap, contextID);
completeBoxPruningTask.mEndTask.setContinuation(continuation);
completeBoxPruningTask.setContinuation(&completeBoxPruningTask.mEndTask);
completeBoxPruningTask.mEndTask.removeReference();
completeBoxPruningTask.removeReference();
return;
}
#endif
PxU32* Remap;
SIMD_AABB_X4* BoxListXBuffer;
SIMD_AABB_YZ4* BoxListYZBuffer;
PxU32 Counters[NB_BUCKETS];
SIMD_AABB_X4* BoxListX[NB_BUCKETS];
SIMD_AABB_YZ4* BoxListYZ[NB_BUCKETS];
PxU32* RemapBase[NB_BUCKETS];
{
PX_PROFILE_ZONE("BoxPruning - PrepareData", contextID);
// PT: TODO: revisit allocs
BoxListXBuffer = reinterpret_cast<SIMD_AABB_X4*>(memoryManager.frameAlloc(sizeof(SIMD_AABB_X4)*(nb+NB_SENTINELS*NB_BUCKETS)));
BoxListYZBuffer = reinterpret_cast<SIMD_AABB_YZ4*>(memoryManager.frameAlloc(sizeof(SIMD_AABB_YZ4)*nb));
const PxVec3& mergedMin = updatedBounds.minimum;
const PxVec3& mergedMax = updatedBounds.maximum;
const float limitY = (mergedMax[1] + mergedMin[1]) * 0.5f;
const float limitZ = (mergedMax[2] + mergedMin[2]) * 0.5f;
for(PxU32 i=0;i<NB_BUCKETS;i++)
Counters[i] = 0;
Remap = reinterpret_cast<PxU32*>(memoryManager.frameAlloc(sizeof(PxU32)*nb));
PxU8* Indices = reinterpret_cast<PxU8*>(memoryManager.frameAlloc(sizeof(PxU8)*nb));
{
PX_PROFILE_ZONE("BoxPruning - ClassifyBoxes", contextID);
for(PxU32 i=0;i<nb;i++)
{
const PxU8 index = classifyBoxNew(listYZ[i], limitY, limitZ);
Indices[i] = index;
Counters[index]++;
}
}
{
SIMD_AABB_X4* CurrentBoxListXBuffer = BoxListXBuffer;
SIMD_AABB_YZ4* CurrentBoxListYZBuffer = BoxListYZBuffer;
PxU32* CurrentRemap = Remap;
for(PxU32 i=0;i<NB_BUCKETS;i++)
{
const PxU32 Nb = Counters[i];
BoxListX[i] = CurrentBoxListXBuffer;
BoxListYZ[i] = CurrentBoxListYZBuffer;
RemapBase[i] = CurrentRemap;
CurrentBoxListXBuffer += Nb+NB_SENTINELS;
CurrentBoxListYZBuffer += Nb;
CurrentRemap += Nb;
}
PX_ASSERT(CurrentBoxListXBuffer == BoxListXBuffer + nb + NB_SENTINELS*NB_BUCKETS);
PX_ASSERT(CurrentBoxListYZBuffer == BoxListYZBuffer + nb);
PX_ASSERT(CurrentRemap == Remap + nb);
}
for(PxU32 i=0;i<NB_BUCKETS;i++)
Counters[i] = 0;
for(PxU32 i=0;i<nb;i++)
{
const PxU32 SortedIndex = i;
const PxU32 TargetBucket = PxU32(Indices[SortedIndex]);
const PxU32 IndexInTarget = Counters[TargetBucket]++;
SIMD_AABB_X4* TargetBoxListX = BoxListX[TargetBucket];
SIMD_AABB_YZ4* TargetBoxListYZ = BoxListYZ[TargetBucket];
PxU32* TargetRemap = RemapBase[TargetBucket];
TargetRemap[IndexInTarget] = remap[SortedIndex];
TargetBoxListX[IndexInTarget] = listX[SortedIndex];
TargetBoxListYZ[IndexInTarget] = listYZ[SortedIndex];
}
memoryManager.frameFree(Indices);
for(PxU32 i=0;i<NB_BUCKETS;i++)
{
SIMD_AABB_X4* TargetBoxListX = BoxListX[i];
const PxU32 IndexInTarget = Counters[i];
for(PxU32 j=0;j<NB_SENTINELS;j++)
TargetBoxListX[IndexInTarget+j].initSentinel();
}
}
{
for(PxU32 i=0;i<NB_BUCKETS;i++)
{
#ifdef RECURSE_LIMIT
if(Counters[i]<RECURSE_LIMIT || Counters[i]==nb)
#endif
doCompleteBoxPruning_Leaf( pairManager,
Counters[i],
BoxListX[i], BoxListYZ[i],
RemapBase[i]);
#ifdef RECURSE_LIMIT
else
CompleteBoxPruning_Recursive(memoryManager, pairManager,
Counters[i],
BoxListX[i], BoxListYZ[i],
RemapBase[i]);
#endif
}
for(PxU32 i=0;i<NB_BUCKETS-1;i++)
{
doBipartiteBoxPruning_Leaf(pairManager,
Counters[i], Counters[NB_BUCKETS-1],
BoxListX[i], BoxListX[NB_BUCKETS-1], BoxListYZ[i], BoxListYZ[NB_BUCKETS-1],
RemapBase[i], RemapBase[NB_BUCKETS-1]
);
}
}
memoryManager.frameFree(Remap);
memoryManager.frameFree(BoxListYZBuffer);
memoryManager.frameFree(BoxListXBuffer);
}
#endif
#endif
#ifdef USE_ALTERNATIVE_VERSION
// PT: experimental version that adds all cross-bucket objects to all regular buckets
static void CompleteBoxPruning_Version16(
ABP_MM& /*memoryManager*/,
const PxBounds3& updatedBounds,
ABP_PairManager* PX_RESTRICT pairManager,
PxU32 nb,
const SIMD_AABB_X4* PX_RESTRICT listX,
const SIMD_AABB_YZ4* PX_RESTRICT listYZ,
const ABP_Index* PX_RESTRICT remap,
const ABPEntry* PX_RESTRICT objects)
{
if(!nb)
return;
const PxVec3& mergedMin = updatedBounds.minimum;
const PxVec3& mergedMax = updatedBounds.maximum;
const float limitY = (mergedMax[1] + mergedMin[1]) * 0.5f;
const float limitZ = (mergedMax[2] + mergedMin[2]) * 0.5f;
PxU32 Counters[NB_BUCKETS];
for(PxU32 i=0;i<NB_BUCKETS;i++)
Counters[i] = 0;
PxU8* Indices = (PxU8*)PX_ALLOC(sizeof(PxU8)*nb, "temp");
for(PxU32 i=0;i<nb;i++)
{
const PxU8 index = classifyBoxNew(listYZ[i], limitY, limitZ);
Indices[i] = index;
Counters[index]++;
}
PxU32 total = 0;
PxU32 Counters2[4];
for(PxU32 i=0;i<4;i++)
{
Counters2[i] = Counters[i] + Counters[4];
total += Counters2[i];
}
// PT: TODO: revisit allocs
SIMD_AABB_X4* BoxListXBuffer = (SIMD_AABB_X4*)PX_ALLOC(sizeof(SIMD_AABB_X4)*(total+NB_SENTINELS*NB_BUCKETS), "temp");
SIMD_AABB_YZ4* BoxListYZBuffer = (SIMD_AABB_YZ4*)PX_ALLOC(sizeof(SIMD_AABB_YZ4)*total, "temp");
PxU32* Remap = (PxU32*)PX_ALLOC(sizeof(PxU32)*total, "temp");
SIMD_AABB_X4* CurrentBoxListXBuffer = BoxListXBuffer;
SIMD_AABB_YZ4* CurrentBoxListYZBuffer = BoxListYZBuffer;
PxU32* CurrentRemap = Remap;
SIMD_AABB_X4* BoxListX[4];
SIMD_AABB_YZ4* BoxListYZ[4];
PxU32* RemapBase[4];
for(PxU32 i=0;i<4;i++)
{
const PxU32 Nb = Counters2[i];
BoxListX[i] = CurrentBoxListXBuffer;
BoxListYZ[i] = CurrentBoxListYZBuffer;
RemapBase[i] = CurrentRemap;
CurrentBoxListXBuffer += Nb+NB_SENTINELS;
CurrentBoxListYZBuffer += Nb;
CurrentRemap += Nb;
}
PX_ASSERT(CurrentBoxListXBuffer == BoxListXBuffer + total + NB_SENTINELS*NB_BUCKETS);
PX_ASSERT(CurrentBoxListYZBuffer == BoxListYZBuffer + total);
PX_ASSERT(CurrentRemap == Remap + total);
for(PxU32 i=0;i<4;i++)
Counters2[i] = 0;
for(PxU32 i=0;i<nb;i++)
{
const PxU32 SortedIndex = i;
const PxU32 TargetBucket = PxU32(Indices[SortedIndex]);
if(TargetBucket==4)
{
for(PxU32 j=0;j<4;j++)
{
const PxU32 IndexInTarget = Counters2[j]++;
SIMD_AABB_X4* TargetBoxListX = BoxListX[j];
SIMD_AABB_YZ4* TargetBoxListYZ = BoxListYZ[j];
PxU32* TargetRemap = RemapBase[j];
TargetRemap[IndexInTarget] = remap[SortedIndex];
TargetBoxListX[IndexInTarget] = listX[SortedIndex];
TargetBoxListYZ[IndexInTarget] = listYZ[SortedIndex];
}
}
else
{
const PxU32 IndexInTarget = Counters2[TargetBucket]++;
SIMD_AABB_X4* TargetBoxListX = BoxListX[TargetBucket];
SIMD_AABB_YZ4* TargetBoxListYZ = BoxListYZ[TargetBucket];
PxU32* TargetRemap = RemapBase[TargetBucket];
TargetRemap[IndexInTarget] = remap[SortedIndex];
TargetBoxListX[IndexInTarget] = listX[SortedIndex];
TargetBoxListYZ[IndexInTarget] = listYZ[SortedIndex];
}
}
PX_FREE(Indices);
for(PxU32 i=0;i<4;i++)
{
SIMD_AABB_X4* TargetBoxListX = BoxListX[i];
const PxU32 IndexInTarget = Counters2[i];
for(PxU32 j=0;j<NB_SENTINELS;j++)
TargetBoxListX[IndexInTarget+j].initSentinel();
}
{
for(PxU32 i=0;i<4;i++)
{
#ifdef RECURSE_LIMIT
if(Counters2[i]<RECURSE_LIMIT || Counters2[i]==nb)
#endif
doCompleteBoxPruning_Leaf( pairManager,
Counters2[i],
BoxListX[i], BoxListYZ[i],
RemapBase[i],
objects);
#ifdef RECURSE_LIMIT
else
CompleteBoxPruning_Recursive( pairManager,
Counters2[i],
BoxListX[i], BoxListYZ[i],
RemapBase[i],
objects);
#endif
}
}
PX_FREE(Remap);
PX_FREE(BoxListYZBuffer);
PX_FREE(BoxListXBuffer);
}
#endif
static void doCompleteBoxPruning_(
#ifdef ABP_MT2
ABP_CompleteBoxPruningStartTask& completeBoxPruningTask,
ABP_CompleteBoxPruningTask& bipTask0,
ABP_CompleteBoxPruningTask& bipTask1,
#endif
ABP_MM& memoryManager, ABP_PairManager* PX_RESTRICT pairManager, const DynamicManager& mDBM, PxBaseTask* continuation, PxU64 contextID)
{
const PxU32 nbUpdated = mDBM.getNbUpdatedBoxes();
if(!nbUpdated)
return;
const PxU32 nbNonUpdated = mDBM.getNbNonUpdatedBoxes();
const DynamicBoxes& updatedBoxes = mDBM.getUpdatedBoxes();
const SIMD_AABB_X4* PX_RESTRICT updatedDynamicBoxes_X = updatedBoxes.getBoxes_X();
const SIMD_AABB_YZ4* PX_RESTRICT updatedDynamicBoxes_YZ = updatedBoxes.getBoxes_YZ();
// PT: find sleeping-dynamics-vs-active-dynamics overlaps
if(nbNonUpdated)
{
#ifdef ABP_MT2
if(continuation)
{
bipTask0.mCounter = nbUpdated;
bipTask0.mBoxListX = updatedBoxes.getBoxes_X();
bipTask0.mBoxListYZ = updatedBoxes.getBoxes_YZ();
bipTask0.mRemap = mDBM.getRemap_Updated();
bipTask0.mType = 1;
bipTask0.mCounter4 = nbNonUpdated;
bipTask0.mBoxListX4 = mDBM.getSleepingBoxes().getBoxes_X();
bipTask0.mBoxListYZ4 = mDBM.getSleepingBoxes().getBoxes_YZ();
bipTask0.mRemap4 = mDBM.getRemap_Sleeping();
bipTask0.mPairs.mSharedPM = pairManager;
//bipTask0.mPairs.mDelayedPairs.reserve(10000);
if(bipTask0.isThereWorkToDo())
{
bipTask0.mID = 0;
bipTask0.setContinuation(continuation);
bipTask0.removeReference();
}
}
else
#endif
doBipartiteBoxPruning_Leaf( pairManager, nbUpdated, nbNonUpdated,
updatedBoxes, mDBM.getSleepingBoxes(),
mDBM.getRemap_Updated(), mDBM.getRemap_Sleeping());
}
///////
// PT: find active-dynamics-vs-active-dynamics overlaps
if(1)
{
PX_UNUSED(memoryManager);
#ifdef USE_ABP_BUCKETS
if(nbUpdated>USE_ABP_BUCKETS)
CompleteBoxPruning_Version16(
#ifdef ABP_MT2
completeBoxPruningTask,
#endif
memoryManager, mDBM.getUpdatedBounds(), pairManager, nbUpdated,
updatedDynamicBoxes_X, updatedDynamicBoxes_YZ,
mDBM.getRemap_Updated(), continuation, contextID);
else
#endif
{
#ifdef ABP_MT2
if(continuation)
{
bipTask1.mCounter = nbUpdated;
bipTask1.mBoxListX = updatedDynamicBoxes_X;
bipTask1.mBoxListYZ = updatedDynamicBoxes_YZ;
bipTask1.mRemap = mDBM.getRemap_Updated();
bipTask1.mType = 0;
bipTask1.mPairs.mSharedPM = pairManager;
//bipTask1.mPairs.mDelayedPairs.reserve(10000);
if(bipTask1.isThereWorkToDo())
{
bipTask1.mID = 0;
bipTask1.setContinuation(continuation);
bipTask1.removeReference();
}
}
else
#endif
doCompleteBoxPruning_Leaf( pairManager, nbUpdated,
updatedDynamicBoxes_X, updatedDynamicBoxes_YZ,
mDBM.getRemap_Updated());
}
}
}
void ABP::Region_prepareOverlaps()
{
PX_PROFILE_ZONE("ABP - Region_prepareOverlaps", mContextID);
if( !mDBM.isThereWorkToDo()
&& !mKBM.isThereWorkToDo()
&& !mSBM.isThereWorkToDo()
)
return;
if(mSBM.isThereWorkToDo())
mSBM.prepareData(mRS, mShared.mABP_Objects, mShared.mABP_Objects_Capacity, mMM, mContextID);
mDBM.prepareData(mRS, mShared.mABP_Objects, mShared.mABP_Objects_Capacity, mMM, mContextID);
mKBM.prepareData(mRS, mShared.mABP_Objects, mShared.mABP_Objects_Capacity, mMM, mContextID);
mRS.reset();
}
// Finds static-vs-dynamic and dynamic-vs-dynamic overlaps
static void findAllOverlaps(
#ifdef ABP_MT2
ABP_CompleteBoxPruningStartTask& completeBoxPruningTask,
ABP_CompleteBoxPruningTask& bipTask0,
ABP_CompleteBoxPruningTask& bipTask1,
ABP_CompleteBoxPruningTask& bipTask2,
ABP_CompleteBoxPruningTask& bipTask3,
ABP_CompleteBoxPruningTask& bipTask4,
#endif
ABP_MM& memoryManager, ABP_PairManager& pairManager, const StaticManager& mSBM, const DynamicManager& mDBM, bool doComplete, bool doBipartite, PxBaseTask* continuation, PxU64 contextID)
{
const PxU32 nbUpdatedBoxesDynamic = mDBM.getNbUpdatedBoxes();
// PT: find dynamics-vs-dynamics overlaps
if(doComplete)
doCompleteBoxPruning_(
#ifdef ABP_MT2
completeBoxPruningTask,
bipTask3,
bipTask4,
#endif
memoryManager, &pairManager, mDBM, continuation, contextID);
// PT: find dynamics-vs-statics overlaps
if(doBipartite)
{
const PxU32 nbUpdatedBoxesStatic = mSBM.getNbUpdatedBoxes();
const PxU32 nbNonUpdatedBoxesStatic = mSBM.getNbNonUpdatedBoxes();
const PxU32 nbNonUpdatedBoxesDynamic = mDBM.getNbNonUpdatedBoxes();
// PT: in previous versions we did active-dynamics-vs-all-statics here.
if(nbUpdatedBoxesDynamic)
{
if(nbUpdatedBoxesStatic)
{
// PT: active static vs active dynamic
#ifdef ABP_MT2
if(continuation)
{
bipTask0.mCounter = nbUpdatedBoxesDynamic;
bipTask0.mBoxListX = mDBM.getUpdatedBoxes().getBoxes_X();
bipTask0.mBoxListYZ = mDBM.getUpdatedBoxes().getBoxes_YZ();
bipTask0.mRemap = mDBM.getRemap_Updated();
bipTask0.mType = 1;
bipTask0.mCounter4 = nbUpdatedBoxesStatic;
bipTask0.mBoxListX4 = mSBM.getUpdatedBoxes().getBoxes_X();
bipTask0.mBoxListYZ4 = mSBM.getUpdatedBoxes().getBoxes_YZ();
bipTask0.mRemap4 = mSBM.getRemap_Updated();
bipTask0.mPairs.mSharedPM = &pairManager;
//bipTask0.mPairs.mDelayedPairs.reserve(10000);
if(bipTask0.isThereWorkToDo())
{
bipTask0.mID = 0;
bipTask0.setContinuation(continuation);
bipTask0.removeReference();
}
}
else
#endif
doBipartiteBoxPruning_Leaf( &pairManager,
nbUpdatedBoxesDynamic, nbUpdatedBoxesStatic,
mDBM.getUpdatedBoxes(), mSBM.getUpdatedBoxes(),
mDBM.getRemap_Updated(), mSBM.getRemap_Updated());
}
if(nbNonUpdatedBoxesStatic)
{
// PT: sleeping static vs active dynamic
#ifdef ABP_MT2
if(continuation)
{
bipTask1.mCounter = nbUpdatedBoxesDynamic;
bipTask1.mBoxListX = mDBM.getUpdatedBoxes().getBoxes_X();
bipTask1.mBoxListYZ = mDBM.getUpdatedBoxes().getBoxes_YZ();
bipTask1.mRemap = mDBM.getRemap_Updated();
bipTask1.mType = 1;
bipTask1.mCounter4 = nbNonUpdatedBoxesStatic;
bipTask1.mBoxListX4 = mSBM.getSleepingBoxes().getBoxes_X();
bipTask1.mBoxListYZ4 = mSBM.getSleepingBoxes().getBoxes_YZ();
bipTask1.mRemap4 = mSBM.getRemap_Sleeping();
bipTask1.mPairs.mSharedPM = &pairManager;
//bipTask1.mPairs.mDelayedPairs.reserve(10000);
if(bipTask1.isThereWorkToDo())
{
bipTask1.mID = 0;
bipTask1.setContinuation(continuation);
bipTask1.removeReference();
}
}
else
#endif
doBipartiteBoxPruning_Leaf( &pairManager,
nbUpdatedBoxesDynamic, nbNonUpdatedBoxesStatic,
mDBM.getUpdatedBoxes(), mSBM.getSleepingBoxes(),
mDBM.getRemap_Updated(), mSBM.getRemap_Sleeping());
}
}
if(nbUpdatedBoxesStatic && nbNonUpdatedBoxesDynamic)
{
// PT: active static vs sleeping dynamic
#ifdef ABP_MT2
if(continuation)
{
bipTask2.mCounter = nbNonUpdatedBoxesDynamic;
bipTask2.mBoxListX = mDBM.getSleepingBoxes().getBoxes_X();
bipTask2.mBoxListYZ = mDBM.getSleepingBoxes().getBoxes_YZ();
bipTask2.mRemap = mDBM.getRemap_Sleeping();
bipTask2.mType = 1;
bipTask2.mCounter4 = nbUpdatedBoxesStatic;
bipTask2.mBoxListX4 = mSBM.getUpdatedBoxes().getBoxes_X();
bipTask2.mBoxListYZ4 = mSBM.getUpdatedBoxes().getBoxes_YZ();
bipTask2.mRemap4 = mSBM.getRemap_Updated();
bipTask2.mPairs.mSharedPM = &pairManager;
//bipTask2.mPairs.mDelayedPairs.reserve(10000);
if(bipTask2.isThereWorkToDo())
{
bipTask2.mID = 0;
bipTask2.setContinuation(continuation);
bipTask2.removeReference();
}
}
else
#endif
doBipartiteBoxPruning_Leaf( &pairManager,
nbNonUpdatedBoxesDynamic, nbUpdatedBoxesStatic,
mDBM.getSleepingBoxes(), mSBM.getUpdatedBoxes(),
mDBM.getRemap_Sleeping(), mSBM.getRemap_Updated());
}
}
}
///////////////////////////////////////////////////////////////////////////
ABP::ABP(PxU64 contextID) :
mSBM (FilterType::STATIC),
mDBM (FilterType::DYNAMIC),
mKBM (FilterType::KINEMATIC),
mContextID (contextID)
#ifdef ABP_MT2
,mTask0 (ABP_TASK_0)
,mTask1 (ABP_TASK_1)
#endif
{
#ifdef ABP_MT2
mTask0.setContextId(mContextID);
mTask1.setContextId(mContextID);
mCompleteBoxPruningTask0.setContextId(mContextID);
mCompleteBoxPruningTask1.setContextId(mContextID);
for(PxU32 k=0; k<9; k++)
{
mCompleteBoxPruningTask0.mTasks[k].setContextId(mContextID);
mCompleteBoxPruningTask1.mTasks[k].setContextId(mContextID);
}
for(PxU32 k=0; k<NB_BIP_TASKS; k++)
mBipTasks[k].setContextId(mContextID);
#endif
}
ABP::~ABP()
{
reset();
}
void ABP::freeBuffers()
{
mShared.mRemovedObjects.empty();
}
void ABP::preallocate(PxU32 nbObjects, PxU32 maxNbOverlaps)
{
if(nbObjects)
{
PX_DELETE_ARRAY(mShared.mABP_Objects);
ABP_Object* objects = PX_NEW(ABP_Object)[nbObjects];
mShared.mABP_Objects = objects;
mShared.mABP_Objects_Capacity = nbObjects;
#if PX_DEBUG
for(PxU32 i=0;i<nbObjects;i++)
objects[i].mUpdated = false;
#endif
}
// PT: TODO: here we should preallocate the box arrays but we don't know how many of them will be static / dynamic...
mPairManager.reserveMemory(maxNbOverlaps);
}
void ABP::addStaticObjects(const BpHandle* userID_, PxU32 nb, PxU32 maxID)
{
mShared.checkResize(maxID);
mSBM.addObjects(userID_, nb, NULL);
}
void ABP::addDynamicObjects(const BpHandle* userID_, PxU32 nb, PxU32 maxID)
{
mShared.checkResize(maxID);
mShared.mUpdatedObjects.checkResize(maxID);
mDBM.addObjects(userID_, nb, &mShared);
}
void ABP::addKinematicObjects(const BpHandle* userID_, PxU32 nb, PxU32 maxID)
{
mShared.checkResize(maxID);
mShared.mUpdatedObjects.checkResize(maxID);
mKBM.addObjects(userID_, nb, &mShared);
}
void ABP::removeObject(BpHandle userID)
{
mShared.mUpdatedObjects.setBitChecked(userID);
mShared.mRemovedObjects.setBitChecked(userID);
PX_ASSERT(userID<mShared.mABP_Objects_Capacity);
ABPEntry& object = mShared.mABP_Objects[userID];
// PT: TODO better
BoxManager* bm;
const FilterType::Enum objectType = object.getType();
if(objectType==FilterType::STATIC)
{
bm = &mSBM;
}
else if(objectType==FilterType::KINEMATIC)
{
bm = &mKBM;
}
else
{
bm = &mDBM;
}
bm->removeObject(object, userID);
object.invalidateIndex();
#if PX_DEBUG
object.mUpdated = false;
#endif
}
void ABP::updateObject(BpHandle userID)
{
mShared.mUpdatedObjects.setBitChecked(userID);
PX_ASSERT(userID<mShared.mABP_Objects_Capacity);
ABPEntry& object = mShared.mABP_Objects[userID];
// PT: TODO better
BoxManager* bm;
const FilterType::Enum objectType = object.getType();
if(objectType==FilterType::STATIC)
{
bm = &mSBM;
}
else if(objectType==FilterType::KINEMATIC)
{
bm = &mKBM;
}
else
{
bm = &mDBM;
}
bm->updateObject(object, userID);
}
// PT: TODO: replace bits with timestamps?
void ABP_PairManager::computeCreatedDeletedPairs(PxArray<BroadPhasePair>& createdPairs, PxArray<BroadPhasePair>& deletedPairs, const BitArray& updated, const BitArray& removed)
{
// PT: parse all currently active pairs. The goal here is to generate the found/lost pairs, compared to previous frame.
// PT: TODO: MT?
PxU32 i=0;
PxU32 nbActivePairs = mNbActivePairs;
while(i<nbActivePairs)
{
InternalPair& p = mActivePairs[i];
if(p.isNew())
{
// New pair
// PT: 'isNew' is set to true in the 'addPair' function. In this case the pair did not previously
// exist in the structure, and thus we must report the new pair to the client code.
//
// PT: group-based filtering is not needed here, since it has already been done in 'addPair'
const PxU32 id0 = p.getId0();
const PxU32 id1 = p.getId1();
PX_ASSERT(id0!=INVALID_ID);
PX_ASSERT(id1!=INVALID_ID);
//createdPairs.pushBack(BroadPhasePair(id0, id1));
BroadPhasePair* newPair = Cm::reserveContainerMemory(createdPairs, 1);
newPair->mVolA = id0;
newPair->mVolB = id1;
// PT: TODO: replace this with bitmaps?
p.clearNew();
p.clearUpdated();
i++;
}
else if(p.isUpdated())
{
// Persistent pair
// PT: this pair already existed in the structure, and has been found again this frame. Since
// MBP reports "all pairs" each frame (as opposed to SAP), this happens quite often, for each
// active persistent pair.
p.clearUpdated();
i++;
}
else
{
// Lost pair
// PT: if the pair is not new and not 'updated', it might be a lost (separated) pair. But this
// is not always the case since we now handle "sleeping" objects directly within MBP. A pair
// of sleeping objects does not generate an 'addPair' call, so it ends up in this codepath.
// Nonetheless the sleeping pair should not be deleted. We can only delete pairs involving
// objects that have been actually moved during the frame. This is the only case in which
// a pair can indeed become 'lost'.
const PxU32 id0 = p.getId0();
const PxU32 id1 = p.getId1();
PX_ASSERT(id0!=INVALID_ID);
PX_ASSERT(id1!=INVALID_ID);
// PT: if none of the involved objects have been updated, the pair is just sleeping: keep it and skip it.
if(updated.isSetChecked(id0) || updated.isSetChecked(id1))
{
// PT: by design (for better or worse) we do not report pairs to the client when
// one of the involved objects has been deleted. The pair must still be deleted
// from the MBP structure though.
if(!removed.isSetChecked(id0) && !removed.isSetChecked(id1))
{
// PT: doing the group-based filtering here is useless. The pair should not have
// been added in the first place.
//deletedPairs.pushBack(BroadPhasePair(id0, id1));
BroadPhasePair* lostPair = Cm::reserveContainerMemory(deletedPairs, 1);
lostPair->mVolA = id0;
lostPair->mVolB = id1;
}
const PxU32 hashValue = hash(id0, id1) & mMask;
removePair(id0, id1, hashValue, i);
nbActivePairs--;
}
else i++;
}
}
shrinkMemory();
}
void ABP::findOverlaps(PxBaseTask* continuation, const Bp::FilterGroup::Enum* PX_RESTRICT groups, const bool* PX_RESTRICT lut)
{
PX_PROFILE_ZONE("ABP - findOverlaps", mContextID);
mPairManager.mGroups = groups;
mPairManager.mLUT = lut;
if(!gPrepareOverlapsFlag)
Region_prepareOverlaps();
bool doKineKine = true;
bool doStaticKine = true;
{
doStaticKine = lut[Bp::FilterType::KINEMATIC*Bp::FilterType::COUNT + Bp::FilterType::STATIC];
doKineKine = lut[Bp::FilterType::KINEMATIC*Bp::FilterType::COUNT + Bp::FilterType::KINEMATIC];
}
// Static-vs-dynamic (bipartite) and dynamic-vs-dynamic (complete)
findAllOverlaps(
#ifdef ABP_MT2
mCompleteBoxPruningTask0,
mBipTasks[0],
mBipTasks[1],
mBipTasks[2],
mBipTasks[3],
mBipTasks[4],
#endif
mMM, mPairManager, mSBM, mDBM, true, true, continuation, mContextID);
// Static-vs-kinematics (bipartite) and kinematics-vs-kinematics (complete)
findAllOverlaps(
#ifdef ABP_MT2
mCompleteBoxPruningTask1,
mBipTasks[5],
mBipTasks[6],
mBipTasks[7],
mBipTasks[8],
mBipTasks[9],
#endif
mMM, mPairManager, mSBM, mKBM, doKineKine, doStaticKine, continuation, mContextID);
if(1)
{
findAllOverlaps(
#ifdef ABP_MT2
mCompleteBoxPruningTask1,
mBipTasks[10],
mBipTasks[11],
mBipTasks[12],
mBipTasks[13],
mBipTasks[14],
#endif
mMM, mPairManager, mKBM, mDBM, false, true, continuation, mContextID);
}
else
{
const PxU32 nbUpdatedDynamics = mDBM.getNbUpdatedBoxes();
const PxU32 nbNonUpdatedDynamics = mDBM.getNbNonUpdatedBoxes();
const PxU32 nbUpdatedKinematics = mKBM.getNbUpdatedBoxes();
const PxU32 nbNonUpdatedKinematics = mKBM.getNbNonUpdatedBoxes();
if(nbUpdatedDynamics)
{
// Active dynamics vs active kinematics
if(nbUpdatedKinematics)
{
doBipartiteBoxPruning_Leaf( &mPairManager,
nbUpdatedDynamics, nbUpdatedKinematics,
mDBM.getUpdatedBoxes(), mKBM.getUpdatedBoxes(),
mDBM.getRemap_Updated(), mKBM.getRemap_Updated());
}
// Active dynamics vs inactive kinematics
if(nbNonUpdatedKinematics)
{
doBipartiteBoxPruning_Leaf( &mPairManager,
nbUpdatedDynamics, nbNonUpdatedKinematics,
mDBM.getUpdatedBoxes(), mKBM.getSleepingBoxes(),
mDBM.getRemap_Updated(), mKBM.getRemap_Sleeping());
}
}
if(nbUpdatedKinematics && nbNonUpdatedDynamics)
{
// Inactive dynamics vs active kinematics
doBipartiteBoxPruning_Leaf( &mPairManager,
nbNonUpdatedDynamics, nbUpdatedKinematics,
mDBM.getSleepingBoxes(), mKBM.getUpdatedBoxes(),
mDBM.getRemap_Sleeping(), mKBM.getRemap_Updated());
}
}
}
PxU32 ABP::finalize(PxArray<BroadPhasePair>& createdPairs, PxArray<BroadPhasePair>& deletedPairs)
{
PX_PROFILE_ZONE("ABP - finalize", mContextID);
{
PX_PROFILE_ZONE("computeCreatedDeletedPairs", mContextID);
mPairManager.computeCreatedDeletedPairs(createdPairs, deletedPairs, mShared.mUpdatedObjects, mShared.mRemovedObjects);
}
mShared.mUpdatedObjects.clearAll();
return mPairManager.mNbActivePairs;
}
#ifdef ABP_MT2
void ABP::addDelayedPairs()
{
PX_PROFILE_ZONE("ABP - addDelayedPairs", mContextID);
mCompleteBoxPruningTask0.addDelayedPairs();
mCompleteBoxPruningTask1.addDelayedPairs();
PxU32 nbDelayedPairs = 0;
for(PxU32 k=0; k<NB_BIP_TASKS; k++)
nbDelayedPairs += mBipTasks[k].mPairs.mDelayedPairs.size();
if(nbDelayedPairs)
{
{
PX_PROFILE_ZONE("ABP - resizeForNewPairs", mContextID);
mPairManager.resizeForNewPairs(nbDelayedPairs);
}
for(PxU32 k=0; k<NB_BIP_TASKS; k++)
mPairManager.addDelayedPairs(mBipTasks[k].mPairs.mDelayedPairs);
}
}
void ABP::addDelayedPairs2(PxArray<BroadPhasePair>& createdPairs)
{
PX_PROFILE_ZONE("ABP - addDelayedPairs", mContextID);
mCompleteBoxPruningTask0.addDelayedPairs2(createdPairs);
mCompleteBoxPruningTask1.addDelayedPairs2(createdPairs);
PxU32 nbDelayedPairs = 0;
for(PxU32 k=0; k<NB_BIP_TASKS; k++)
nbDelayedPairs += mBipTasks[k].mPairs.mDelayedPairs.size();
if(nbDelayedPairs)
{
{
PX_PROFILE_ZONE("ABP - resizeForNewPairs", mContextID);
mPairManager.resizeForNewPairs(nbDelayedPairs);
}
for(PxU32 k=0; k<NB_BIP_TASKS; k++)
mPairManager.addDelayedPairs2(createdPairs, mBipTasks[k].mPairs.mDelayedPairs);
}
}
#endif
void ABP::reset()
{
mSBM.reset();
mDBM.reset();
mKBM.reset();
PX_DELETE_ARRAY(mShared.mABP_Objects);
mShared.mABP_Objects_Capacity = 0;
mPairManager.purge();
mShared.mUpdatedObjects.empty();
mShared.mRemovedObjects.empty();
}
// PT: TODO: is is really ok to use "transient" data in this function?
void ABP::shiftOrigin(const PxVec3& shift, const PxBounds3* /*boundsArray*/, const PxReal* /*contactDistances*/)
{
PX_UNUSED(shift); // PT: unused because the bounds were pre-shifted before calling this function
// PT: the AABB manager marks all objects as updated when we shift so the stuff below may not be necessary
}
void ABP::setTransientData(const PxBounds3* bounds, const PxReal* contactDistance)
{
mSBM.setSourceData(bounds, contactDistance);
mDBM.setSourceData(bounds, contactDistance);
mKBM.setSourceData(bounds, contactDistance);
}
///////////////////////////////////////////////////////////////////////////////
}
// Below is the PhysX wrapper = link between AABBManager and ABP
using namespace internalABP;
#define DEFAULT_CREATED_DELETED_PAIRS_CAPACITY 1024
BroadPhaseABP::BroadPhaseABP( PxU32 maxNbBroadPhaseOverlaps,
PxU32 maxNbStaticShapes,
PxU32 maxNbDynamicShapes,
PxU64 contextID,
bool enableMT) :
mNbAdded (0),
mNbUpdated (0),
mNbRemoved (0),
mCreatedHandles (NULL),
mUpdatedHandles (NULL),
mRemovedHandles (NULL),
mGroups (NULL),
mFilter (NULL),
mContextID (contextID),
mEnableMT (enableMT)
{
mABP = PX_NEW(ABP)(contextID);
const PxU32 nbObjects = maxNbStaticShapes + maxNbDynamicShapes;
mABP->preallocate(nbObjects, maxNbBroadPhaseOverlaps);
mCreated.reserve(DEFAULT_CREATED_DELETED_PAIRS_CAPACITY);
mDeleted.reserve(DEFAULT_CREATED_DELETED_PAIRS_CAPACITY);
}
BroadPhaseABP::~BroadPhaseABP()
{
PX_DELETE(mABP);
}
void BroadPhaseABP::update(PxcScratchAllocator* scratchAllocator, const BroadPhaseUpdateData& updateData, PxBaseTask* continuation)
{
PX_PROFILE_ZONE("BroadPhaseABP - update", mContextID);
PX_CHECK_AND_RETURN(scratchAllocator, "BroadPhaseABP::update - scratchAllocator must be non-NULL \n");
{
PX_PROFILE_ZONE("BroadPhaseABP - setup", mContextID);
mABP->mMM.mScratchAllocator = scratchAllocator;
mABP->setTransientData(updateData.getAABBs(), updateData.getContactDistance());
const PxU32 newCapacity = updateData.getCapacity();
mABP->mShared.checkResize(newCapacity);
#if PX_CHECKED
// PT: WARNING: this must be done after the allocateMappingArray call
if(!BroadPhaseUpdateData::isValid(updateData, *this, false, mContextID))
{
PX_CHECK_MSG(false, "Illegal BroadPhaseUpdateData \n");
return;
}
#endif
mGroups = updateData.getGroups();
mFilter = &updateData.getFilter();
mNbAdded = updateData.getNumCreatedHandles();
mNbUpdated = updateData.getNumUpdatedHandles();
mNbRemoved = updateData.getNumRemovedHandles();
mCreatedHandles = updateData.getCreatedHandles();
mUpdatedHandles = updateData.getUpdatedHandles();
mRemovedHandles = updateData.getRemovedHandles();
}
// PT: run single-threaded if forced to do so
if(!mEnableMT)
continuation = NULL;
#ifdef ABP_MT2
if(continuation)
{
mABP->mTask1.mBP = this;
mABP->mTask1.setContinuation(continuation);
mABP->mTask0.mBP = this;
mABP->mTask0.setContinuation(&mABP->mTask1);
mABP->mTask1.removeReference();
mABP->mTask0.removeReference();
}
else
#endif
{
{
PX_PROFILE_ZONE("BroadPhaseABP - setUpdateData", mContextID);
removeObjects();
addObjects();
updateObjects();
PX_ASSERT(!mCreated.size());
PX_ASSERT(!mDeleted.size());
if(gPrepareOverlapsFlag)
mABP->Region_prepareOverlaps();
}
{
PX_PROFILE_ZONE("BroadPhaseABP - update", mContextID);
mABP->findOverlaps(continuation, mGroups, mFilter->getLUT());
}
{
PX_PROFILE_ZONE("BroadPhaseABP - postUpdate", mContextID);
mABP->finalize(mCreated, mDeleted);
}
}
}
#ifdef ABP_MT2
void ABP_InternalTask::run()
{
PX_SIMD_GUARD
internalABP::ABP* abp = mBP->mABP;
if(mID==ABP_TASK_0)
{
{
PX_PROFILE_ZONE("ABP_InternalTask - setUpdateData", mContextID);
mBP->removeObjects();
mBP->addObjects();
mBP->updateObjects();
PX_ASSERT(!mBP->mCreated.size());
PX_ASSERT(!mBP->mDeleted.size());
if(gPrepareOverlapsFlag)
abp->Region_prepareOverlaps();
}
{
PX_PROFILE_ZONE("ABP_InternalTask - update", mContextID);
for(PxU32 k=0;k<9;k++)
{
abp->mCompleteBoxPruningTask0.mTasks[k].mPairs.mDelayedPairs.resetOrClear();
abp->mCompleteBoxPruningTask1.mTasks[k].mPairs.mDelayedPairs.resetOrClear();
}
for(PxU32 k=0;k<NB_BIP_TASKS;k++)
abp->mBipTasks[k].mPairs.mDelayedPairs.resetOrClear();
abp->findOverlaps(getContinuation(), mBP->mGroups, mBP->mFilter->getLUT());
}
}
else if(mID==ABP_TASK_1)
{
//abp->addDelayedPairs();
//abp->finalize(mBP->mCreated, mBP->mDeleted);
abp->finalize(mBP->mCreated, mBP->mDeleted);
abp->addDelayedPairs2(mBP->mCreated);
}
}
#endif
void BroadPhaseABP::removeObjects()
{
PX_PROFILE_ZONE("BroadPhaseABP - removeObjects", mContextID);
PxU32 nbRemoved = mNbRemoved;
const BpHandle* removed = mRemovedHandles;
if(!nbRemoved || !removed)
return;
while(nbRemoved--)
{
const BpHandle index = *removed++;
PX_ASSERT(index+1<mABP->mShared.mABP_Objects_Capacity); // PT: we allocated one more box on purpose
mABP->removeObject(index);
}
}
void BroadPhaseABP::updateObjects()
{
PX_PROFILE_ZONE("BroadPhaseABP - updateObjects", mContextID);
PxU32 nbUpdated = mNbUpdated;
const BpHandle* updated = mUpdatedHandles;
if(!nbUpdated || !updated)
return;
while(nbUpdated--)
{
const BpHandle index = *updated++;
PX_ASSERT(index+1<mABP->mShared.mABP_Objects_Capacity); // PT: we allocated one more box on purpose
mABP->updateObject(index);
}
}
void BroadPhaseABP::addObjects()
{
PX_PROFILE_ZONE("BroadPhaseABP - addObjects", mContextID);
PxU32 nbAdded = mNbAdded;
const BpHandle* created = mCreatedHandles;
if(!nbAdded || !created)
return;
const Bp::FilterGroup::Enum* PX_RESTRICT groups = mGroups;
struct Batch
{
PX_FORCE_INLINE Batch() : mNb(0), mMaxIndex(0) {}
PxU32 mNb;
PxU32 mMaxIndex;
BpHandle mIndices[ABP_BATCHING];
PX_FORCE_INLINE void add(const BpHandle index, internalABP::ABP* PX_RESTRICT abp, FilterType::Enum type)
{
PxU32 nb = mNb;
mMaxIndex = PxMax(mMaxIndex, index);
mIndices[nb++] = index;
if(nb==ABP_BATCHING)
{
mNb = 0;
// PT: TODO: we could use a function ptr here
if(type==FilterType::STATIC)
abp->addStaticObjects(mIndices, ABP_BATCHING, mMaxIndex);
else if(type==FilterType::KINEMATIC)
abp->addKinematicObjects(mIndices, ABP_BATCHING, mMaxIndex);
else
{
PX_ASSERT(type==FilterType::DYNAMIC || type==FilterType::AGGREGATE);
abp->addDynamicObjects(mIndices, ABP_BATCHING, mMaxIndex);
}
mMaxIndex = 0;
}
else
mNb = nb;
}
};
Batch statics;
Batch dynamics;
Batch kinematics;
Batch* batches[FilterType::COUNT] = {NULL};
batches[FilterType::STATIC] = &statics;
batches[FilterType::DYNAMIC] = &dynamics;
batches[FilterType::AGGREGATE] = &dynamics;
batches[FilterType::KINEMATIC] = &kinematics;
while(nbAdded--)
{
const BpHandle index = *created++;
PX_ASSERT(index+1<mABP->mShared.mABP_Objects_Capacity); // PT: we allocated one more box on purpose
FilterType::Enum type = FilterType::Enum(groups[index] & BP_FILTERING_TYPE_MASK);
if(!batches[type])
type = FilterType::DYNAMIC;
batches[type]->add(index, mABP, type);
}
if(statics.mNb)
mABP->addStaticObjects(statics.mIndices, statics.mNb, statics.mMaxIndex);
if(kinematics.mNb)
mABP->addKinematicObjects(kinematics.mIndices, kinematics.mNb, kinematics.mMaxIndex);
if(dynamics.mNb)
mABP->addDynamicObjects(dynamics.mIndices, dynamics.mNb, dynamics.mMaxIndex);
}
const BroadPhasePair* BroadPhaseABP::getCreatedPairs(PxU32& nbCreatedPairs) const
{
nbCreatedPairs = mCreated.size();
return mCreated.begin();
}
const BroadPhasePair* BroadPhaseABP::getDeletedPairs(PxU32& nbDeletedPairs) const
{
nbDeletedPairs = mDeleted.size();
return mDeleted.begin();
}
static void freeBuffer(PxArray<BroadPhasePair>& buffer)
{
const PxU32 size = buffer.size();
if(size>DEFAULT_CREATED_DELETED_PAIRS_CAPACITY)
{
buffer.reset();
buffer.reserve(DEFAULT_CREATED_DELETED_PAIRS_CAPACITY);
}
else
{
buffer.clear();
}
}
void BroadPhaseABP::freeBuffers()
{
PX_PROFILE_ZONE("BroadPhaseABP - freeBuffers", mContextID);
mABP->freeBuffers();
freeBuffer(mCreated);
freeBuffer(mDeleted);
}
#if PX_CHECKED
bool BroadPhaseABP::isValid(const BroadPhaseUpdateData& updateData) const
{
const PxU32 nbObjects = mABP->mShared.mABP_Objects_Capacity;
PX_UNUSED(nbObjects);
const ABP_Object* PX_RESTRICT objects = mABP->mShared.mABP_Objects;
const BpHandle* created = updateData.getCreatedHandles();
if(created)
{
PxU32 nbToGo = updateData.getNumCreatedHandles();
while(nbToGo--)
{
const BpHandle index = *created++;
PX_ASSERT(index<nbObjects);
if(objects[index].isValid())
return false; // This object has been added already
}
}
const BpHandle* updated = updateData.getUpdatedHandles();
if(updated)
{
PxU32 nbToGo = updateData.getNumUpdatedHandles();
while(nbToGo--)
{
const BpHandle index = *updated++;
PX_ASSERT(index<nbObjects);
if(!objects[index].isValid())
return false; // This object has been removed already, or never been added
}
}
const BpHandle* removed = updateData.getRemovedHandles();
if(removed)
{
PxU32 nbToGo = updateData.getNumRemovedHandles();
while(nbToGo--)
{
const BpHandle index = *removed++;
PX_ASSERT(index<nbObjects);
if(!objects[index].isValid())
return false; // This object has been removed already, or never been added
}
}
return true;
}
#endif
void BroadPhaseABP::shiftOrigin(const PxVec3& shift, const PxBounds3* boundsArray, const PxReal* contactDistances)
{
mABP->shiftOrigin(shift, boundsArray, contactDistances);
}