// Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // * Neither the name of NVIDIA CORPORATION nor the names of its // contributors may be used to endorse or promote products derived // from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved. // Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. // Copyright (c) 2001-2004 NovodeX AG. All rights reserved. #ifndef PXG_CUDA_BROADPHASE_SAP_H #define PXG_CUDA_BROADPHASE_SAP_H #include "foundation/PxPinnedArray.h" #include "BpBroadPhase.h" #include "PxgCudaBuffer.h" #include "PxgCudaMemoryAllocator.h" #include "PxgBroadPhasePairReport.h" #include "PxgHeapMemAllocator.h" // PT: for SUPPORT_UPDATE_HANDLES_ARRAY_FOR_GPU #include "PxgBroadPhaseDesc.h" #include "foundation/PxPreprocessor.h" #include "foundation/PxSimpleTypes.h" namespace physx { class KernelWrangler; class PxgBpCudaMemoryAllocator; class PxgSapPairManager; struct PxGpuDynamicsMemoryConfig; class PxgAABBManager; struct PxgBroadPhaseDesc; struct PxgRadixSortDesc; class PxgCudaKernelWranglerManager; class PxSceneDesc; namespace Bp { class BroadPhaseUpdateData; } class PxgCudaBroadPhaseSap : public Bp::BroadPhase { PX_NOCOPY(PxgCudaBroadPhaseSap) public: PxgCudaBroadPhaseSap(const PxGpuBroadPhaseDesc& desc, PxgCudaKernelWranglerManager* gpuKernelWrangler, PxCudaContextManager* cudaContextManager, const PxGpuDynamicsMemoryConfig& init, PxgHeapMemoryAllocatorManager* heapMemoryManager, PxU64 contextID); ~PxgCudaBroadPhaseSap(); // Bp::BroadPhase virtual PxBroadPhaseType::Enum getType() const PX_OVERRIDE { return PxBroadPhaseType::eGPU; } virtual void release() PX_OVERRIDE; virtual void update(PxcScratchAllocator* scratchAllocator, const Bp::BroadPhaseUpdateData& updateData, PxBaseTask* continuation) PX_OVERRIDE; virtual void preBroadPhase(const Bp::BroadPhaseUpdateData& updateData) PX_OVERRIDE; virtual void fetchBroadPhaseResults() PX_OVERRIDE; virtual const Bp::BroadPhasePair* getCreatedPairs(PxU32& nbCreatedPairs) const PX_OVERRIDE { nbCreatedPairs = mFoundActorPairs.size(); return reinterpret_cast(mFoundActorPairs.begin()); } virtual const Bp::BroadPhasePair* getDeletedPairs(PxU32& nbDeletedPairs) const PX_OVERRIDE { nbDeletedPairs = mLostActorPairs.size(); return reinterpret_cast(mLostActorPairs.begin()); } virtual void freeBuffers() PX_OVERRIDE; // PT: TODO: shift origin for GPU BP? virtual void shiftOrigin(const PxVec3& /*shift*/, const PxBounds3* /*boundsArray*/, const PxReal* /*contactDistances*/) PX_OVERRIDE {} #if PX_CHECKED virtual bool isValid(const Bp::BroadPhaseUpdateData& updateData) const PX_OVERRIDE { PX_UNUSED(updateData); return true; } #endif //~Bp::BroadPhase PX_FORCE_INLINE PxgTypedCudaBuffer& getBoundsBuffer() { return mBoxFpBoundsBuf; } PX_FORCE_INLINE PxgTypedCudaBuffer& getContactDistBuffer() { return mBoxContactDistancesBuf; } // PX_FORCE_INLINE PxCudaContextManager* getCudaContextManager() { return mCudaContextManager; } PX_FORCE_INLINE CUstream getBpStream() { return mStream; } PX_FORCE_INLINE PxgDevicePointer getBroadPhaseDescDevicePtr() { return mBPDescBuf.getTypedDevicePtr();} // PX_FORCE_INLINE CUdeviceptr getFoundPairsDevicePtr() { return mFoundPairsBuf.getDevicePtr(); } // PX_FORCE_INLINE CUdeviceptr getLostPairsDevicePtr() { return mLostPairsBuf.getDevicePtr(); } PX_FORCE_INLINE PxgAABBManager* getAABBManager() { return mAABBManager; } void purgeDuplicateFoundPairs(); void purgeDuplicateLostPairs(); // PT: TODO: wait, why is this in the BP? PX_FORCE_INLINE void sortPairs(PxPinnedArray& pairs) { sortBuffer(pairs.begin(), pairs.size()); } PX_FORCE_INLINE void setGPUAABBManager(PxgAABBManager* manager) { mAABBManager = manager; } #if PX_ENABLE_SIM_STATS PX_FORCE_INLINE PxU32 getFoundLostPairsStats() { return mFoundLostPairsStats; } #else PX_CATCH_UNDEFINED_ENABLE_SIM_STATS #endif private: void gpuDMAUp(const Bp::BroadPhaseUpdateData& updateData, PxgBroadPhaseDesc& desc, PxgRadixSortDesc* rsDescs); void gpuDMABack(const PxgBroadPhaseDesc& desc); void createGpuStreamsAndEvents(); void releaseGpuStreamsAndEvents(); void translateAABBsKernel(); void markRemovedPairsKernel(); void markRemovedPairsProjectionsKernel(); void markUpdatedPairsKernel(); void markCreatedPairsKernel(); void sortProjectionAndHandlesWRKernel(PxU32 previousNumOfBoxes); void sortProjectionAndHandlesWORKernel(PxU32 previousNumOfBoxes); void initializeSapBoxKernel(const PxU32 numHandles, bool isNew); void calculateEndPtHistogramKernel(const bool isIncremental); void computeRegionHistogramKernel(); void computeStartAndActiveHistogramKernel(); void performIncrementalSapKernel(); void generateNewPairsKernel(); void clearNewFlagKernel(); void updateDescriptor(PxgBroadPhaseDesc& bpDesc); void updateRadixSortDesc(PxgRadixSortDesc* rsDesc); void runRadixSort(const PxU32 numOfKeys, CUdeviceptr radixSortDescBuf); void purgeDuplicates(PxPinnedArray& pairs); void runCopyResultsKernel(PxgBroadPhaseDesc& desc); void sortBuffer(PxgBroadPhasePair* reportBuffer, PxU32 size); PxU64 mContextID; PxGpuBroadPhaseDesc mDesc; // PT: from PxgBroadPhaseSap PxU32 mNumOfBoxes; //total number of boxes in the scene PxU32 mUpdateData_CreatedHandleSize; PxU32 mUpdateData_RemovedHandleSize; #ifdef SUPPORT_UPDATE_HANDLES_ARRAY_FOR_GPU PxU32 mUpdateData_UpdatedHandleSize; #endif PxU32 mUpdateData_BoxesCapacity; PxgCudaKernelWranglerManager* mGpuKernelWranglerManager; PxCudaContextManager* mCudaContextManager; PxCudaContext* mCudaContext; PxgHeapMemoryAllocatorManager* mHeapMemoryManager; PxgTypedCudaBuffer mCreatedHandlesBuf; PxgTypedCudaBuffer mRemovedHandlesBuf; #ifdef SUPPORT_UPDATE_HANDLES_ARRAY_FOR_GPU PxgTypedCudaBuffer mUpdatedHandlesBuf; #endif //..................................................................// PT: Description: |Comes from: |Passed to kernels as: //..................................................................//------------------------------------------|-------------------------------------------|------------------------------ PxgTypedCudaBuffer mBoxFpBoundsBuf; // box bounds in device memory |BroadPhaseUpdateData::getAABBs() |updateData_fpBounds PxgTypedCudaBuffer mBoxContactDistancesBuf; // contact distances in device memory |BroadPhaseUpdateData::getContactDistance() |updateData_contactDistances PxgTypedCudaBuffer mBoxGroupsBuf; // box groups in device memory |BroadPhaseUpdateData::getGroups |updateData_groups PxgTypedCudaBuffer mBoxEnvIDsBuf; // box env IDs in device memory |BroadPhaseUpdateData::getEnvIDs |updateData_envIDs PxgTypedCudaBuffer mNewIntegerBoundsBuf; // integer bounds in device memory |translateAABBsLaunch kernel |newIntegerBounds PxgTypedCudaBuffer mOldIntegerBoundsBuf; // integer bounds in device memory |- |oldIntegerBounds PxgCudaBufferN<3> mBoxPtProjectionsBuf; // integer bounds in device memory |markCreatedPairsLaunch kernel |boxProjections PxgCudaBufferN<3> mBoxProjectionRanksBuf; PxgCudaBufferN<6> mBoxPtHandlesBuf; //double buffer PxgCudaBufferN<3> mTempBoxPtProjectionBuf; PxgCudaBufferN<3> mTempBoxPtHandlesBuf; PxgCudaBufferN<3> mRadixCountBuf; PxgCudaBufferN<3> mBoxSapBox1DBuf; //PxgSapBox1D, the size should be the same as handles PxgCudaBufferN<3> mNewBoxSapBox1DBuf; //PxgSapBox1D, the size should be the same as handles PxgCudaBufferN<6> mEndPtHistogramBuf; //! Histogram for all start handles PxgCudaBufferN<6> mBlockEndPtHistogramBuf; PxgCudaBufferN<6> mEndPtHandleBuf; PxgCudaBufferN<6> mStartPtHistogramBuf; PxgCudaBufferN<6> mBlockStartPtHistogramBuf; PxgCudaBufferN<6> mStartPtHandleBuf; PxgCudaBufferN<6> mTotalEndPtHistogramBuf; PxgCudaBufferN<6> mBlockTotalEndPtHistogramBuf; PxgTypedCudaBuffer mActiveRegionTotalBuf; PxgTypedCudaBuffer mStartRegionsTotalBuf; PxgTypedCudaBuffer mOrderedActiveRegionHandlesTotalBuf; PxgTypedCudaBuffer mOrderedStartRegionHandlesTotalBuf; PxgTypedCudaBuffer mOverlapChecksRegionBuf; PxgTypedCudaBuffer mBlockOverlapChecksRegionBuf; regionOverlapType mOverlapChecksTotalRegion; PxgTypedCudaBuffer mOverlapChecksHandleRegionBuf; PxgCudaBufferN<3> mIncrementalComparisons; PxgCudaBufferN<3> mIncrementalBlockComparisons; PxgCudaBufferN<2> mAggregateReportBlockBuf; PxgCudaBufferN<2> mActorReportBlockBuf; PxgTypedCudaBuffer mRegionRangeBuf; PxgTypedCudaBuffer mStartRegionAccumBuf; PxgTypedCudaBuffer mBlockStartRegionAccumBuf; PxU32 mStartRegionAccumTotal; //need to write back to cpu every frame PxgTypedCudaBuffer mRegionAccumBuf; PxgTypedCudaBuffer mBlockRegionAccumBuf; PxU32 mRegionAccumTotal; //need to write back to cpu every frame PxgTypedCudaBuffer mFoundPairsBuf; //total found pairs(include actors and aggregates) PxgTypedCudaBuffer mLostPairsBuf; //total lost pairs(include actors and aggregates) PxgTypedCudaBuffer mFoundAggregateBuf; PxgTypedCudaBuffer mLostAggregateBuf; PxgTypedCudaBuffer mFoundActorBuf; PxgTypedCudaBuffer mLostActorBuf; PxgTypedCudaBuffer mBPDescBuf; PxgTypedCudaBuffer mRadixSortDescBuf; PxgTypedCudaBuffer mRadixSortWORDescBuf; CUstream mStream; CUevent mEvent; PxU32* mPinnedEvent; PxgBroadPhaseDesc* mBpDesc; PxgRadixSortDesc* mRSDesc; PxgRadixSortDesc* mRSDescWOR; //wor :: without ranks PxPinnedArray mFoundActorPairs; PxPinnedArray mLostActorPairs; //PxArray mHistogramBuffer; //PxArray mTempPairBuffer; PxU32 mMaxFoundLostPairs; PxU32 mMaxAggFoundLostPairs; PxgAABBManager* mAABBManager; #if PX_ENABLE_SIM_STATS PxU32 mFoundLostPairsStats; // keeps track of max lost found pairs value to tune preallocated buffer size. #else PX_CATCH_UNDEFINED_ENABLE_SIM_STATS #endif bool mForceUpdate; }; } #endif