Files
XCEngine/engine/third_party/physx/source/gpusolver/src/PxgContext.cpp

2510 lines
109 KiB
C++

// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#include "PxgContext.h"
#include "cudamanager/PxCudaContext.h"
#include "common/PxProfileZone.h"
#include "PxgIslandContext.h"
#include "PxgSolverCore.h"
#include "PxvSimStats.h"
#include "DyConstraintPrep.h"
#include "PxgArticulationCore.h"
#include "PxgSoftBodyCore.h"
#include "PxgFEMClothCore.h"
#include "DyDeformableSurface.h"
#include "DyDeformableVolume.h"
#include "PxgSimulationCore.h"
#include "PxgPBDParticleSystemCore.h"
#include "DyIslandManager.h"
#include "CmFlushPool.h"
// PT: TODO: this doesn't compile anymore these days
//#undef PXG_CONTACT_VALIDATION
//#define PXG_CONTACT_VALIDATION 1
namespace physx
{
#if PXG_CONTACT_VALIDATION
#pragma warning(push)
#pragma warning(disable:4100)
static bool validateContactPairs(PxU32 startIndex, PxU32 endIndex, PxU32* uniqueIds, PxU32* npIds, PxsContactManagerOutputIterator& outputIter,
PxU8* basePatchPointer, PxU8* baseContactPointer)
{
for (PxU32 a = startIndex; a < endIndex; ++a)
{
PxU32 uniqueId = uniqueIds[a];
PxU32 npId = npIds[uniqueId];
PxsContactManagerOutput& output = outputIter.getContactManagerOutput(npId);
PxContactPatch* contactPatches = reinterpret_cast<PxContactPatch*>(output.contactPatches);
PxContact* contacts = reinterpret_cast<PxContact*>(output.contactPoints);
PX_ASSERT((contactPatches - reinterpret_cast<PxContactPatch*>(basePatchPointer)) < 655360);
PX_ASSERT((contacts - reinterpret_cast<PxContact*>(baseContactPointer)) < (3145728));
PX_ASSERT(output.nbPatches != 0);
PxU32 totalContact = 0;
for (PxU32 i = 0; i < output.nbPatches; ++i)
{
PxContactPatch& patch = contactPatches[i];
PX_ASSERT(patch.startContactIndex < output.nbContacts);
PX_ASSERT(patch.normal.isNormalized());
totalContact += patch.nbContacts;
}
for (PxU32 i = 0; i < output.nbContacts; ++i)
{
PX_ASSERT(contacts[i].contact.isFinite());
PX_ASSERT(PxIsFinite(contacts[i].separation));
}
PX_ASSERT(totalContact == output.nbContacts);
}
return true;
}
static bool validateConstraintPairs(PxU32 startIndex, PxU32 endIndex, PxU32* uniqueIds, PxU32* npIds, PxgConstraintPrePrep* constraintPrePrep, PxU32* solverBodyIndices)
{
for (PxU32 a = startIndex; a < endIndex; ++a)
{
PxU32 uniqueId = uniqueIds[a];
PxU32 npId = npIds[uniqueId];
PxgConstraintPrePrep& prePrep = constraintPrePrep[npId];
PX_ASSERT(prePrep.mNodeIndexA.index() == PX_INVALID_NODE || prePrep.mNodeIndexA.index() < 16000);
PX_ASSERT(prePrep.mNodeIndexB.index() == PX_INVALID_NODE || prePrep.mNodeIndexB.index() < 16000);
PX_ASSERT(prePrep.mNodeIndexA.index() == PX_INVALID_NODE || solverBodyIndices[prePrep.mNodeIndexA.index()] < 16000);
PX_ASSERT(prePrep.mNodeIndexB.index() == PX_INVALID_NODE || solverBodyIndices[prePrep.mNodeIndexB.index()] < 16000);
}
return true;
}
#pragma warning(pop)
#endif
class PxgBatchArticulationStaticConstraintPrePrepTask : public Cm::Task
{
PX_NOCOPY(PxgBatchArticulationStaticConstraintPrePrepTask)
private:
PxU32* mStaticContactIndices;
PxU32* mStaticJointIndices;
PxU32* mStaticContactCounts;
PxU32* mStaticJointCounts;
PxU32* mSelfContactIndices;
PxU32* mSelfJointIndices;
PxU32* mSelfContactCounts;
PxU32* mSelfJointCounts;
const PxU32 mStartIndex;
const PxU32 mEndIndex;
PxNodeIndex* mNodeIndices;
PxgBodySimManager& mBodyManager;
const PxU32 mNbArticulations;
public:
static const PxU32 NbPerTask = 512;
PxgBatchArticulationStaticConstraintPrePrepTask(PxU64 context,
PxU32* staticContactIndices, PxU32* staticJointIndices, PxU32* staticContactCounts, PxU32* staticJointCounts,
PxU32* selfContactIndices, PxU32* selfJointIndices, PxU32* selfContactCounts, PxU32* selfJointCounts,
PxU32 startIndex, PxU32 endIndex, PxNodeIndex* nodeIndices, PxgBodySimManager& bodyManager,
PxU32 nbArticulations) :
Cm::Task(context),
mStaticContactIndices(staticContactIndices), mStaticJointIndices(staticJointIndices),
mStaticContactCounts(staticContactCounts), mStaticJointCounts(staticJointCounts),
mSelfContactIndices(selfContactIndices), mSelfJointIndices(selfJointIndices),
mSelfContactCounts(selfContactCounts), mSelfJointCounts(selfJointCounts),
mStartIndex(startIndex), mEndIndex(endIndex),
mNodeIndices(nodeIndices), mBodyManager(bodyManager), mNbArticulations(nbArticulations)
{
}
virtual const char* getName() const PX_OVERRIDE PX_FINAL
{
return "PxgBatchArticulationStaticConstraintPrePrepTask";
}
virtual void runInternal() PX_OVERRIDE PX_FINAL
{
const PxU32 stride = mNbArticulations;
//const PxU32 blockCount = (mNbArticulations + 31)/32;
for (PxU32 i = mStartIndex; i < mEndIndex; ++i)
{
const PxU32 nodeIndex = mNodeIndices[i].index();
PxgStaticConstraints& staticConstraints = mBodyManager.mStaticConstraints[nodeIndex];
const PxU32 staticContactCount = staticConstraints.mStaticContacts.size();
PxgStaticConstraint* uniqueIds = staticConstraints.mStaticContacts.begin();
mStaticContactCounts[i] = staticContactCount;
for (PxU32 a = 0, offset = i; a < staticContactCount; ++a, offset += stride)
{
mStaticContactIndices[offset] = uniqueIds[a].uniqueId;
}
const PxU32 staticJointCount = staticConstraints.mStaticJoints.size();
uniqueIds = staticConstraints.mStaticJoints.begin();
mStaticJointCounts[i] = staticJointCount;
for (PxU32 a = 0, offset = i; a < staticJointCount; ++a, offset += stride)
{
mStaticJointIndices[offset] = uniqueIds[a].uniqueId;
}
const PxU32 articIndex = mBodyManager.mNodeToRemapMap[nodeIndex];
PxgArticulationSelfConstraints& selfConstraints = mBodyManager.mArticulationSelfConstraints[articIndex];
const PxU32 selfContactCount = selfConstraints.mSelfContacts.size();
PxgSelfConstraint* selfIds = selfConstraints.mSelfContacts.begin();
mSelfContactCounts[i] = selfContactCount;
for (PxU32 a = 0, offset = i; a < selfContactCount; ++a, offset += stride)
{
mSelfContactIndices[offset] = selfIds[a].uniqueId;
}
const PxU32 selfJointCount = selfConstraints.mSelfJoints.size();
selfIds = selfConstraints.mSelfJoints.begin();
mSelfJointCounts[i] = selfJointCount;
for (PxU32 a = 0, offset = i; a < selfJointCount; ++a, offset += stride)
{
mSelfJointIndices[offset] = selfIds[a].uniqueId;
}
}
}
};
class PxgBatchRigidStaticConstraintPrePrepTask : public Cm::Task
{
PX_NOCOPY(PxgBatchRigidStaticConstraintPrePrepTask)
private:
PxU32* mStaticContactIndices;
PxU32* mStaticJointIndices;
PxU32* mStaticContactCounts;
PxU32* mStaticJointCounts;
const PxU32 mStartIndex;
const PxU32 mEndIndex;
PxNodeIndex* mNodeIndices;
PxgBodySimManager& mBodyManager;
const PxU32 mNbBodies;
public:
static const PxU32 NbPerTask = 256;
PxgBatchRigidStaticConstraintPrePrepTask(PxU64 context,
PxU32* staticContactIndices, PxU32* staticJointIndices, PxU32* staticContactCounts, PxU32* staticJointCounts,
PxU32 startIndex, PxU32 endIndex, PxNodeIndex* nodeIndices, PxgBodySimManager& bodyManager,
PxU32 nbBodies) :
Cm::Task(context),
mStaticContactIndices(staticContactIndices), mStaticJointIndices(staticJointIndices),
mStaticContactCounts(staticContactCounts), mStaticJointCounts(staticJointCounts),
mStartIndex(startIndex), mEndIndex(endIndex),
mNodeIndices(nodeIndices), mBodyManager(bodyManager), mNbBodies(nbBodies)
{
}
virtual const char* getName() const PX_OVERRIDE PX_FINAL
{
return "PxgBatchRigidStaticConstraintPrePrepTask";
}
virtual void runInternal() PX_OVERRIDE PX_FINAL
{
const PxU32 stride = mNbBodies;
for (PxU32 i = mStartIndex; i < mEndIndex; ++i)
{
const PxU32 nodeIndex = mNodeIndices[i].index();
PxgStaticConstraints& staticConstraints = mBodyManager.mStaticConstraints[nodeIndex];
const PxU32 staticContactCount = staticConstraints.mStaticContacts.size();
PxgStaticConstraint* uniqueIds = staticConstraints.mStaticContacts.begin();
mStaticContactCounts[i] = staticContactCount;
for (PxU32 a = 0, offset = i; a < staticContactCount; ++a, offset += stride)
{
mStaticContactIndices[offset] = uniqueIds[a].uniqueId;
}
const PxU32 staticJointCount = staticConstraints.mStaticJoints.size();
uniqueIds = staticConstraints.mStaticJoints.begin();
mStaticJointCounts[i] = staticJointCount;
for (PxU32 a = 0, offset = i; a < staticJointCount; ++a, offset += stride)
{
mStaticJointIndices[offset] = uniqueIds[a].uniqueId;
}
}
}
};
void PxgCpuConstraintPrePrepTask::runInternal()
{
PX_PROFILE_ZONE("GpuDynamics.PxgCpuJointPrePrepTask", 0);
PxU32 currentEdgeIndex = 0;
for (PxU32 a = 0; a < mNumBatches; ++a)
{
PxU32 descStride = PxMin(mNumEdges - currentEdgeIndex, PXG_BATCH_SIZE);
PxgConstraintBatchHeader& batchHeader = mBatchHeaders[a];
batchHeader.constraintType = PxgSolverConstraintDesc::eCONSTRAINT_1D;
batchHeader.mDescStride = PxU16(descStride);
batchHeader.mConstraintBatchIndex = mConstraintBlockStartIndex + a;
batchHeader.mStartPartitionIndex = mUniqueIdStartIndex + a * PXG_BATCH_SIZE;
batchHeader.mask = 0xFFFFFFFF; //Unused
#if PXG_CONTACT_VALIDATION
validateConstraintPairs(a, a + descStride, mEdgeIds + a, mNpIds, mConstraintPrePrep, mSolverBodyIndices);
#endif
currentEdgeIndex += descStride;
}
for (PxU32 a = 0; a < mNumEdges; ++a)
{
mPinnedEdgeIds[mUniqueIdStartIndex + a] = mEdgeIds[a + mStartEdgeIdx];
}
//PxMemCopy(mPinnedEdgeIds + mUniqueIdStartIndex, mEdgeIds, sizeof(PxU32) * mNumEdges);
}
void PxgCpuArtiConstraintPrePrepTask::runInternal()
{
PX_PROFILE_ZONE("GpuDynamics.PxgCpuArtiJointPrePrepTask", 0);
PxU32 currentEdgeIndex = 0;
for (PxU32 a = 0; a < mNumBatches; ++a)
{
PxgConstraintBatchHeader& batchHeader = mBatchHeaders[a];
PxU32 descStride = PxMin(mNumEdges - currentEdgeIndex, PXG_BATCH_SIZE);
batchHeader.constraintType = PxU16(mIsContact ? PxgSolverConstraintDesc::eARTICULATION_CONTACT : PxgSolverConstraintDesc::eARTICULATION_CONSTRAINT_1D);
batchHeader.mDescStride = PxU16(descStride);
batchHeader.mConstraintBatchIndex = mConstraintBlockStartIndex + a;
batchHeader.mStartPartitionIndex = mUniqueIdStartIndex + a * PXG_BATCH_SIZE;
batchHeader.mask = 0xFFFFFFFF; //Unused
#if PXG_CONTACT_VALIDATION
validateConstraintPairs(a, a + descStride, mEdgeIds + a, mNpIds, mConstraintPrePrep, mSolverBodyIndices);
#endif
currentEdgeIndex += descStride;
}
for (PxU32 a = 0; a < mNumEdges; ++a)
{
mPinnedEdgeIds[mUniqueIdStartIndex + a] = mEdgeIds[a + mStartEdgeIdx];
}
//PxMemCopy(mPinnedEdgeIds + mUniqueIdStartIndex, mEdgeIds, sizeof(PxU32) * mNumEdges);
}
void PxgCpuPrepTask::runInternal()
{
mContext.doConstraintPrePrepCommon(mCont);
}
PxgGpuContext::PxgGpuContext(Cm::FlushPool& flushPool, IG::SimpleIslandManager& islandManager, PxU32 maxNumPartitions, PxU32 maxNumStaticPartitions,
bool enableStabilization, bool useEnhancedDeterminism,
PxReal maxBiasCoefficient, PxvSimStats& simStats, PxgHeapMemoryAllocatorManager* heapMemoryManager, PxReal lengthScale, bool enableDirectGPUAPI, PxU64 contextID, bool isResidualReportingEnabled, bool isTGS) :
Dy::Context(islandManager, heapMemoryManager->mMappedMemoryAllocators, simStats, enableStabilization,
useEnhancedDeterminism, maxBiasCoefficient, lengthScale, contextID, isResidualReportingEnabled),
mTotalEdges(0), mTotalPreviousEdges(0),
mFlushPool(flushPool),
mSolvedThisFrame(false),
mIncrementalPartition(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators), maxNumPartitions, contextID),
mActiveNodeIndex(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
mSolverBodyPool(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
mBody2WorldPool(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
mLinkAndJointAndRootStateDataPool(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
mArticulationSleepDataPool(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
mInternalResidualPerArticulationVelIter(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
mInternalResidualPerArticulationPosIter(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
m1dConstraintBatchIndices(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
mContactConstraintBatchIndices(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
mArti1dConstraintBatchIndices(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
mArtiContactConstraintBatchIndices(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
mConstraintsPerPartition(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
mArtiConstraintsPerPartition(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
mSolverBodyDataPool(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
mSolverBodySleepDataPool(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
mSolverTxIDataPool(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
mCachedPositionIterations(0), mCachedVelocityIterations(0),
mArtiStaticContactCounts(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
mArtiStaticJointCounts(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
mArtiStaticContactIndices(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
mArtiStaticJointIndices(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
mArtiSelfContactCounts(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
mArtiSelfJointCounts(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
mArtiSelfContactIndices(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
mArtiSelfJointIndices(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
mRigidStaticContactCounts(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
mRigidStaticJointCounts(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
mRigidStaticContactIndices(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
mRigidStaticJointIndices(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
mNodeIndicesStagingBuffer(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
mIslandIds(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
mIslandStaticTouchCounts(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
mIsTGS(isTGS),
mIsExternalForcesEveryTgsIterationEnabled(false),
mEnableDirectGPUAPI(enableDirectGPUAPI),
mRecomputeArticulationBlockFormat(false),
mEnforceConstraintWriteBackToHostCopy(false),
mPreIntegrationTask (*this),
mPrepTask (*this),
mGpuPrePrepTask (*this),
mGpuIntegrationTask (*this),
mGpuTask (*this),
mPostSolveTask (*this)
{
mGpuArticulationCore = NULL;
mGpuBp = NULL;
mGpuNpCore = NULL;
mGpuSoftBodyCore = NULL;
mGpuFEMClothCore = NULL;
mGpuSimulationCore = NULL;
mGpuSolverCore = NULL;
mGpuPBDParticleSystemCore = NULL;
mMaxNumStaticPartitions = maxNumStaticPartitions;
}
PxgGpuContext::~PxgGpuContext()
{
mGpuSolverCore->acquireContext();
PX_DELETE(mPinnedMemoryAllocator);
PX_DELETE(mContactStreamAllocators[0]);
PX_DELETE(mContactStreamAllocators[1]);
PX_DELETE(mPatchStreamAllocators[0]);
PX_DELETE(mPatchStreamAllocators[1]);
PX_DELETE(mForceStreamAllocator);
PX_DELETE(mFrictionPatchStreamAllocator);
mGpuSolverCore->releaseStreams();
mGpuSolverCore->releaseContext();
PX_DELETE(mThresholdStream);
PX_DELETE(mForceChangedThresholdStream);
PX_DELETE(mGpuArticulationCore);
PX_DELETE(mGpuSolverCore);
}
PxgSimulationController* PxgGpuContext::getSimulationController()
{
return static_cast<PxgSimulationController*>(mSimulationController);
}
void PxgGpuContext::setSimulationController(PxsSimulationController* simulationController)
{
mSimulationController = simulationController;
}
PxgParticleSystemCore* PxgGpuContext::getGpuParticleSystemCore()
{
return mGpuPBDParticleSystemCore;
}
void PxgGpuContext::mergeResults()
{
//Flip the current contact stream
mCurrentContactStream = 1 - mCurrentContactStream;
mContactStreamPool.mDataStream = mContactStreamAllocators[mCurrentContactStream]->mStart;
mPatchStreamPool.mDataStream = mPatchStreamAllocators[mCurrentContactStream]->mStart;
mContactStreamPool.mSharedDataIndex = 0;
mPatchStreamPool.mSharedDataIndex = 0;
mForceStreamPool.mSharedDataIndex = 0;
mFrictionPatchStreamPool.mSharedDataIndex = 0;
mContactStreamPool.mSharedDataIndexGPU = 0;
mPatchStreamPool.mSharedDataIndexGPU = 0;
mForceStreamPool.mSharedDataIndexGPU = 0;
mFrictionPatchStreamPool.mSharedDataIndexGPU = 0;
}
void PxgGpuContext::getDataStreamBase(void*& contactStreamBase, void*& patchStreamBase, void*& forceAndIndiceStreamBase)
{
return mGpuSolverCore->getDataStreamBase(contactStreamBase, patchStreamBase, forceAndIndiceStreamBase);
}
//this is the pre-prepare code for block format joints loaded from the non-block format joints
void PxgGpuContext::doConstraintJointBlockPrePrepGPU()
{
//DMA the joint pre-prepare data which constructs in CPU(not D6Joint) to GPU
// AD: This is not needed for direct-GPU API but downstream things are getting really complex and I cannot
// figure out which count I need to adjust to avoid crashing.
//if (!mEnableDirectGPUAPI)
{
PxgJointManager& jointManager = getSimulationController()->getJointManager();
if (jointManager.getCpuNbRigidConstraints() > 0)
{
mGpuSolverCore->gpuMemDMAUpJointData(jointManager.getCpuRigidConstraintData(), jointManager.getCpuRigidConstraintRows(), jointManager.getCpuRigidConstraintData().size(), jointManager.getGpuNbRigidConstraints(),
PxU32(jointManager.mNbCpuRigidConstraintRows));
}
if (jointManager.getCpuNbArtiConstraints() > 0)
{
mGpuSolverCore->gpuMemDMAUpArtiJointData(jointManager.getCpuArtiConstraintData(), jointManager.getCpuArtiConstraintRows(), jointManager.getCpuArtiConstraintData().size(), jointManager.getGpuNbArtiConstraints(),
PxU32(jointManager.mNbCpuArtiConstraintRows));
}
}
// maybe this is also not needed if we have direct-GPU?
mGpuSolverCore->jointConstraintBlockPrePrepParallel(mNumConstraintBatches + mNumRigidStaticConstraintBatches + mNumArticConstraintBatches + mNumArtiStaticConstraintBatches + mNumArtiSelfConstraintBatches);
}
void PxgGpuContext::doStaticArticulationConstraintPrePrep(physx::PxBaseTask* continuation, const PxU32 articulationConstraintBatchIndex, const PxU32 articulationContactBatchIndex)
{
PxgBodySimManager& bodyManager = getSimulationController()->getBodySimManager();
PxgIslandContext& island = mIslandContextPool[0];
const PxU32 articulationStartIndex = island.mBodyStartIndex + island.mBodyCount;
PxNodeIndex* nodeIndices = mActiveNodeIndex.begin() + articulationStartIndex;
//KS - TODO - revisit this and make it work with batching. Currently, it is disabled!
mArtiStaticConstraintBatchOffset = articulationConstraintBatchIndex;
mArtiStaticContactBatchOffset = articulationContactBatchIndex;
PX_PROFILE_ZONE("Articulation Static constraint", 0);
mArtiStaticContactCounts.resize(mArticulationCount);
mArtiStaticJointCounts.resize(mArticulationCount);
mArtiSelfContactCounts.resize(mArticulationCount);
mArtiSelfJointCounts.resize(mArticulationCount);
PxU32 maxArtiStaticContacts = bodyManager.mMaxStaticArticContacts;
PxU32 maxArtiStaticJoints = bodyManager.mMaxStaticArticJoints;
PxU32 maxArtiSelfContacts = bodyManager.mMaxSelfArticContacts;
PxU32 maxArtiSelfJoints = bodyManager.mMaxSelfArticJoints;
mArtiStaticContactIndices.resize(maxArtiStaticContacts * mArticulationCount);
mArtiStaticJointIndices.resize(maxArtiStaticJoints * mArticulationCount);
mArtiSelfContactIndices.resize(maxArtiSelfContacts * mArticulationCount);
mArtiSelfJointIndices.resize(maxArtiSelfJoints * mArticulationCount);
for (PxU32 i = 0; i < mArticulationCount; i += PxgBatchArticulationStaticConstraintPrePrepTask::NbPerTask)
{
PxU32 endIndex = PxMin(i + PxgBatchArticulationStaticConstraintPrePrepTask::NbPerTask, mArticulationCount);
PxgBatchArticulationStaticConstraintPrePrepTask* task = PX_PLACEMENT_NEW(mFlushPool.allocate(sizeof(PxgBatchArticulationStaticConstraintPrePrepTask)), PxgBatchArticulationStaticConstraintPrePrepTask)
(0, mArtiStaticContactIndices.begin(), mArtiStaticJointIndices.begin(), mArtiStaticContactCounts.begin(), mArtiStaticJointCounts.begin(),
mArtiSelfContactIndices.begin(), mArtiSelfJointIndices.begin(), mArtiSelfContactCounts.begin(), mArtiSelfJointCounts.begin(),
i, endIndex, nodeIndices, bodyManager, mArticulationCount);
task->setContinuation(continuation);
task->removeReference();
}
}
void PxgGpuContext::doStaticRigidConstraintPrePrep(physx::PxBaseTask* continuation)
{
PX_PROFILE_ZONE("Rigid Static constraint", 0);
PxgBodySimManager& bodyManager = getSimulationController()->getBodySimManager();
PxgIslandContext& island = mIslandContextPool[0];
const PxU32 bodyStartIndex = island.mBodyStartIndex;
PxNodeIndex* nodeIndices = mActiveNodeIndex.begin() + bodyStartIndex;
mRigidStaticContactCounts.resize(mBodyCount);
mRigidStaticJointCounts.resize(mBodyCount);
PxU32 maxRigidStaticContacts = bodyManager.mMaxStaticRBContacts;
PxU32 maxRigidStaticJoints = bodyManager.mMaxStaticRBJoints;
mRigidStaticContactIndices.resize(maxRigidStaticContacts * mBodyCount);
mRigidStaticJointIndices.resize(maxRigidStaticJoints * mBodyCount);
for (PxU32 i = 0; i < mBodyCount; i += PxgBatchArticulationStaticConstraintPrePrepTask::NbPerTask)
{
PxU32 endIndex = PxMin(i + PxgBatchArticulationStaticConstraintPrePrepTask::NbPerTask, mBodyCount);
PxgBatchRigidStaticConstraintPrePrepTask* task = PX_PLACEMENT_NEW(mFlushPool.allocate(sizeof(PxgBatchRigidStaticConstraintPrePrepTask)), PxgBatchRigidStaticConstraintPrePrepTask)
(0, mRigidStaticContactIndices.begin(), mRigidStaticJointIndices.begin(), mRigidStaticContactCounts.begin(), mRigidStaticJointCounts.begin(),
i, endIndex, nodeIndices, bodyManager, mBodyCount);
task->setContinuation(continuation);
task->removeReference();
}
}
void PxgGpuContext::doConstraintSolveGPU(PxU32 maxNodes, PxBitMapPinned& changedHandleMap)
{
/**
* Things to do in here:
* (1) Solve on GPU
* (2) Write-back on GPU
* (2) Integration on GPU (transforms are now on GPU solver body data so might as well use them)
*/
mGpuArticulationCore->syncStream();
mConstraintPositionIterResidualPoolGpu.resize(mConstraintWriteBackPool.size());
mGpuSolverCore->solveContactMultiBlockParallel(mIslandContextPool, mNumIslandContextPool,
mIncrementalPartition.getCombinedSlabMaxNbPartitions(), mConstraintsPerPartition, mArtiConstraintsPerPartition, mGravity,
mConstraintPositionIterResidualPoolGpu.begin(), mConstraintPositionIterResidualPoolGpu.size(), &mTotalContactError.mPositionIterationErrorAccumulator,
mArticulationContactErrorPosIter, mInternalResidualPerArticulationPosIter);
mContactErrorPosIter = &mTotalContactError.mPositionIterationErrorAccumulator;
if (mHasForceThresholds)
mGpuSolverCore->accumulatedForceThresholdStream(maxNodes + 1);
const PxU32 offset = 1 + mKinematicCount;
//KS - todo - use separate streams. In addition, read number of threshold streams before DMAing back data
mGpuSolverCore->gpuMemDMAbackSolverData(mForceStreamPool.mDataStream,
mForceStreamPool.mDataStreamSize - mForceStreamPool.mSharedDataIndex,
(PxU32)mForceStreamPool.mSharedDataIndex, (PxU32)mForceStreamPool.mSharedDataIndexGPU,
mForceChangedThresholdStream->begin(), mIncrementalPartition.hasForceThresholds(),
mConstraintWriteBackPool.begin(), mConstraintWriteBackPool.size(),
(!mEnableDirectGPUAPI || getSimulationController()->getEnableOVDCollisionReadback()), mContactErrorVelIter);
mGpuSolverCore->integrateCoreParallel(offset, mSolverBodyPool.size());
mGpuArticulationCore->updateBodies(mDt, !mIsTGS, mEnableDirectGPUAPI);
mSimulationController->update(changedHandleMap);
if (isResidualReportingEnabled())
mArticulationContactErrorVelIter.resize(1);
if (!mEnableDirectGPUAPI || getSimulationController()->getEnableOVDReadback())
{
mGpuArticulationCore->gpuMemDMAbackArticulation(mLinkAndJointAndRootStateDataPool, mArticulationSleepDataPool,
mInternalResidualPerArticulationVelIter, mArticulationContactErrorVelIter);
}
mGpuSolverCore->gpuMemDMAbackSolverBodies(reinterpret_cast<float4*>(mSolverBodyPool.begin()), mSolverBodyPool.size(), mBody2WorldPool,
mSolverBodySleepDataPool, mEnableDirectGPUAPI && (!getSimulationController()->getEnableOVDReadback()));
}
class PxgPostSolveWorkerTask : public Cm::Task
{
PxNodeIndex* mNodeIndices;
PxAlignedTransform* mBodyToWorldPool;
PxgSolverBodySleepData* mSolverBodySleepDataPool;
float4* mBodyVelocities;
PxU32 mNbBodies;
PxU32 mTotalBodies;
IG::IslandSim* mIslandSim;
public:
PxgPostSolveWorkerTask(PxNodeIndex* nodeIndices, PxAlignedTransform* bodyToWorldPool, PxgSolverBodySleepData* solverBodySleepDataPool, float4* bodyVelocities, PxU32 nbBodies, PxU32 totalBodies,
IG::IslandSim* islandSim) : Cm::Task(0),
mNodeIndices(nodeIndices), mBodyToWorldPool(bodyToWorldPool), mSolverBodySleepDataPool(solverBodySleepDataPool), mBodyVelocities(bodyVelocities), mNbBodies(nbBodies), mTotalBodies(totalBodies),
mIslandSim(islandSim)
{
}
virtual void runInternal() PX_OVERRIDE PX_FINAL
{
PX_PROFILE_ZONE("GpuDynamics.PxgPostSolveWorkerTask", 0);
// AD: skip this if we had GPU errors, will lead to asserts down below
// for signalling reasons we skip outside.
//copy data from PxgSolverBodyData to PxsBodyCore
for (PxU32 i = 0; i < mNbBodies; i++)
{
const PxU32 index = mNodeIndices[i].index();
//copy integration data
const PxgSolverBodySleepData& sleepData = mSolverBodySleepDataPool[i];
PxsRigidBody& originalBody = *getRigidBodyFromIG(*mIslandSim, PxNodeIndex(index));
PxsBodyCore& bodyCore = originalBody.getCore();
originalBody.mLastTransform = bodyCore.body2World;
const PxAlignedTransform& body2World = mBodyToWorldPool[i];
bodyCore.body2World = body2World.getTransform();
const float4& linVel = mBodyVelocities[i];
const float4& angVel = mBodyVelocities[i + mTotalBodies];
bodyCore.linearVelocity = PxVec3(linVel.x, linVel.y, linVel.z);
bodyCore.angularVelocity = PxVec3(angVel.x, angVel.y, angVel.z);
//copy sleep check data
bodyCore.solverWakeCounter = sleepData.wakeCounter;
originalBody.mInternalFlags = PxU8(sleepData.internalFlags);
PX_ASSERT(bodyCore.linearVelocity.isFinite());
PX_ASSERT(bodyCore.angularVelocity.isFinite());
}
}
virtual const char* getName() const PX_OVERRIDE PX_FINAL
{
return "PxgPostSolveWorkerTask";
}
private:
PX_NOCOPY(PxgPostSolveWorkerTask)
};
class PxgPostSolveArticulationTask : public Cm::Task
{
PxNodeIndex* mNodeIndices;
//see PxgArticulationLinkJointRootStateData
PxU8* mLinkAndJointAndRootStates;
Dy::ErrorAccumulator* mInternalResidualPerArticulationVelIter;
Dy::ErrorAccumulator* mInternalResidualPerArticulationPosIter;
PxgSolverBodySleepData* mSleepData;
PxU32 mNbArticulations;
PxU32 mArticulationStartIndex; //articulation offset in the nodeIndex
PxU32 mBatchStartIndex;
IG::SimpleIslandManager* mIslandManager;
PxU32 mMaxLinks;
PxU32 mMaxDofs;
PxReal mDt;
PxU32 mArticulationCount;
public:
PxgPostSolveArticulationTask(PxNodeIndex* nodeIndices, PxU8* linkAndJointAndRootStates, Dy::ErrorAccumulator* internalResidualPerArticulationPosIter,
Dy::ErrorAccumulator* internalResidualPerArticulationVelIter, PxgSolverBodySleepData* sleepData, PxU32 nbArticulation,
PxU32 articulationStartIndex,
IG::SimpleIslandManager* islandManager, const PxU32 batchStartIndex, const PxU32 maxLinks, const PxU32 maxDofs,
const PxReal dt, const PxU32 totalArticulationCount) :
Cm::Task(0), mNodeIndices(nodeIndices),
mLinkAndJointAndRootStates(linkAndJointAndRootStates),
mInternalResidualPerArticulationVelIter(internalResidualPerArticulationVelIter),
mInternalResidualPerArticulationPosIter(internalResidualPerArticulationPosIter),
mSleepData(sleepData),
mNbArticulations(nbArticulation),
mArticulationStartIndex(articulationStartIndex),
mBatchStartIndex(batchStartIndex),
mIslandManager(islandManager),
mMaxLinks(maxLinks), mMaxDofs(maxDofs),
mDt(dt), mArticulationCount(totalArticulationCount)
{
}
virtual void runInternal() PX_OVERRIDE PX_FINAL
{
PX_PROFILE_ZONE("GpuDynamics.PxgPostSolveArticulationTask", 0);
const PxU32 maxLinks = mMaxLinks;
const PxU32 maxDofs = mMaxDofs;
//copy data from PxgSolverBodyData to PxsBodyCore
const PxU32 endIndex = mBatchStartIndex + mNbArticulations;
IG::IslandSim& sim = mIslandManager->getAccurateIslandSim();
for (PxU32 a = mBatchStartIndex; a < endIndex; a++)
{
const PxU32 ind = a + mArticulationStartIndex;
PxNodeIndex nodeIndex = mNodeIndices[ind];
//const PxU32 nodeIndex = mNodeIndices[ind].index();
//copy integration data
Dy::FeatherstoneArticulation& articulation = *getArticulationFromIG(sim, nodeIndex);
Dy::ArticulationData& artiData = articulation.getArticulationData();
articulation.mInternalErrorAccumulatorPosIter = mInternalResidualPerArticulationPosIter[a];
articulation.mInternalErrorAccumulatorVelIter = mInternalResidualPerArticulationVelIter[a];
articulation.mContactErrorAccumulatorPosIter = mInternalResidualPerArticulationPosIter[a + mArticulationCount];
articulation.mContactErrorAccumulatorVelIter = mInternalResidualPerArticulationVelIter[a + mArticulationCount];
artiData.setDt(mDt);
const PxU32 numLinks = artiData.getLinkCount();
const PxU32 numDofs = artiData.getDofs();
//Get the address of the buffer holding the state data for the current articulation.
PxU8* singleArticulationStateBuffer =
PxgArticulationLinkJointRootStateData::getArticulationStateDataBuffer(
mLinkAndJointAndRootStates,
maxLinks, maxDofs, a);
//Decompose the buffer into its sub-arrays.
PxTransform* sBody2Worlds = NULL;
Cm::UnAlignedSpatialVector* sLinkVelocities = NULL;
Cm::UnAlignedSpatialVector* sLinkAccelerations = NULL;
Cm::UnAlignedSpatialVector* sLinkIncomingJointForces = NULL;
PxReal* sJointPositions = NULL;
PxReal* sJointVelocities = NULL;
PxReal* sJointAccels = NULL;
Cm::UnAlignedSpatialVector* sRootPreVel = NULL;
PxgArticulationLinkJointRootStateData::decomposeArticulationStateDataBuffer(
singleArticulationStateBuffer,
numLinks, numDofs,
sBody2Worlds, sLinkVelocities, sLinkAccelerations, sLinkIncomingJointForces,
sJointPositions, sJointVelocities, sJointAccels,
sRootPreVel);
Dy::ArticulationCore* core = articulation.getCore();
core->wakeCounter = mSleepData[a].wakeCounter;
if (mSleepData[a].internalFlags & PxsRigidBody::eACTIVATE_THIS_FRAME)
{
mIslandManager->getAccurateIslandSim().activateNode_ForGPUSolver(nodeIndex);
mIslandManager->getSpeculativeIslandSim().activateNode_ForGPUSolver(nodeIndex);
}
else if (mSleepData[a].internalFlags & PxsRigidBody::eDEACTIVATE_THIS_FRAME)
{
mIslandManager->getAccurateIslandSim().deactivateNode_ForGPUSolver(nodeIndex);
mIslandManager->getSpeculativeIslandSim().deactivateNode_ForGPUSolver(nodeIndex);
}
Dy::ArticulationLink* links = artiData.getLinks();
Cm::SpatialVectorF* linkVelocities = artiData.getMotionVelocities();
Cm::SpatialVectorF* linkAccelerations = artiData.getMotionAccelerations();
Cm::SpatialVectorF* linkIncomingJointForces = artiData.getLinkIncomingJointForces();
for (PxU32 i = 0; i < numLinks; ++i)
{
Dy::ArticulationLink& link = links[i];
PX_ASSERT(sBody2Worlds[i].isValid());
link.bodyCore->body2World = sBody2Worlds[i];
link.bodyCore->angularVelocity = sLinkVelocities[i].top;
link.bodyCore->linearVelocity = sLinkVelocities[i].bottom;
linkVelocities[i].top = sLinkVelocities[i].top;
linkVelocities[i].bottom = sLinkVelocities[i].bottom;
linkAccelerations[i].top = sLinkAccelerations[i].top;
linkAccelerations[i].bottom = sLinkAccelerations[i].bottom;
linkIncomingJointForces[i].top = sLinkIncomingJointForces[i].top;
linkIncomingJointForces[i].bottom = sLinkIncomingJointForces[i].bottom;
}
linkIncomingJointForces[0].top = PxVec3(PxZero);
linkIncomingJointForces[0].bottom = PxVec3(PxZero);
PxReal* jointPositions = artiData.getJointPositions();
PxReal* jointVelocities = artiData.getJointVelocities();
PxReal* jointAccelerations = artiData.getJointAccelerations();
for (PxU32 i = 0; i < numDofs; ++i)
{
jointPositions[i] = sJointPositions[i];
jointVelocities[i] = sJointVelocities[i];
jointAccelerations[i] = sJointAccels[i];
}
artiData.setRootPreMotionVelocity(*sRootPreVel);
}
}
virtual const char* getName() const PX_OVERRIDE PX_FINAL
{
return "PxgPostSolveArticulationTask";
}
private:
PX_NOCOPY(PxgPostSolveArticulationTask)
};
void PxgGpuContext::processPatches( Cm::FlushPool& flushPool, PxBaseTask* continuation,
PxsContactManager** lostFoundPatchManagers, PxU32 nbLostFoundPatchManagers, PxsContactManagerOutputCounts* outCounts)
{
mIncrementalPartition.processLostFoundPatches( flushPool, continuation, mIslandManager.getAccurateIslandSim(),
getSimulationController()->getBodySimManager(), getSimulationController()->getJointManager(),
lostFoundPatchManagers, nbLostFoundPatchManagers, outCounts);
}
void PxgGpuContext::doPostSolveTask(physx::PxBaseTask* continuation)
{
if (!mSolvedThisFrame)
return;
// AD: sneaky, but apparently only narrowphasecore has that member public.
if (getNarrowphaseCore()->mCudaContext->isInAbortMode())
return;
const PxU32 numParticleCores = mGpuParticleSystemCores.size();
for (PxU32 i = 0; i < numParticleCores; ++i)
{
PxgParticleSystemCore* core = mGpuParticleSystemCores[i];
const PxReal eps = 0.f;// mLengthScale * 1e-4f;
core->integrateSystems(mDt, eps*eps);
core->onPostSolve(); // call the callback.
}
PxU32 nbThresholdElems = 0;
mGpuSolverCore->syncDmaBack(nbThresholdElems);
mForceChangedThresholdStream->forceSize_Unsafe(nbThresholdElems);
if (!mEnableDirectGPUAPI || getSimulationController()->getEnableOVDReadback())
{
//TODO - multi-thread this!
const PxU32 offset = 1 + mKinematicCount;
PxPinnedArray<PxgSolverBody>& solverBodyIter = mSolverBodyPool;
float4* bodyVelocities = reinterpret_cast<float4*>(solverBodyIter.begin());
PxAlignedTransform* body2Worlds = mBody2WorldPool.begin();
PxNodeIndex* nodeIndices = mActiveNodeIndex.begin();
const PxU32 totalNumBodies = mSolverBodyPool.size();
const PxU32 batchSize = 512;
IG::IslandSim* accurateIslandSim = &mIslandManager.getAccurateIslandSim();
//write back the data to PxsBodyCore
for (PxU32 i = offset; i < totalNumBodies; i += batchSize)
{
PxgSolverBodySleepData* sleepData = &mSolverBodySleepDataPool[i];
PxgPostSolveWorkerTask* task = PX_PLACEMENT_NEW(mFlushPool.allocate(sizeof(PxgPostSolveWorkerTask)), PxgPostSolveWorkerTask)(nodeIndices + i, body2Worlds + i, sleepData, bodyVelocities + i,
PxMin(batchSize, totalNumBodies - i), totalNumBodies, accurateIslandSim);
task->setContinuation(continuation);
task->removeReference();
}
const PxU32 maxLinks = getSimulationController()->getSimulationCore()->getMaxArticulationLinks();
const PxU32 maxDofs = getSimulationController()->getSimulationCore()->getMaxArticulationDofs();
const PxU32 articulationBatchSize = PxMax(64u, (mArticulationCount + 127u) / 128u);
for (PxU32 i = 0; i < mArticulationCount; i += articulationBatchSize)
{
PxgPostSolveArticulationTask* task = PX_PLACEMENT_NEW(mFlushPool.allocate(sizeof(PxgPostSolveArticulationTask)), PxgPostSolveArticulationTask)(nodeIndices,
mLinkAndJointAndRootStateDataPool.begin(), mInternalResidualPerArticulationPosIter.begin(), mInternalResidualPerArticulationVelIter.begin(),
mArticulationSleepDataPool.begin(), PxMin(articulationBatchSize, mArticulationCount - i), mArticulationStartIndex, &mIslandManager, i,
maxLinks, maxDofs, mDt, mArticulationCount);
task->setContinuation(continuation);
task->removeReference();
}
}
mGpuSolverCore->acquireContext();
for (PxU32 i = 0; i < numParticleCores; ++i)
{
PxgParticleSystemCore* core = mGpuParticleSystemCores[i];
cuStreamQuery(core->getFinalizeStream()); //Flush particle work
}
mGpuSolverCore->releaseContext();
}
static void copyToSolverBodyStaticAndKinematic(PxgSolverBodyData& data, PxgSolverTxIData& txIData, const PxsBodyCore& core, PxNodeIndex nodeIndex)
{
// PT: not needed for statics/kinematics
// if(core.disableGravity)
// sleepData.internalFlags |= PxsRigidBody::eDISABLE_GRAVITY_GPU;
//This data has been moved to pxgbodysim
//data.inverseInertia = make_float4(core.inverseInertia.x, core.inverseInertia.y, core.inverseInertia.z, 0.f);
//PxU32 islandNodeIndex = nodeIndex << 2;
////Enable CCD...
//if (core.mFlags & PxRigidBodyFlag::eENABLE_SPECULATIVE_CCD)
// islandNodeIndex |= 1;
//if (originalBody.mInternalFlags & PxsRigidBody::eHAS_SURFACE_VELOCITY)
// islandNodeIndex |= 2;
data.islandNodeIndex = nodeIndex;
// Copy simple properties
data.initialLinVel = core.linearVelocity;
data.initialAngVel = core.angularVelocity;
txIData.sqrtInvInertia = PxMat33(PxZero);
txIData.deltaBody2World = PxTransform(PxIdentity);
PX_ASSERT(core.linearVelocity.isFinite());
PX_ASSERT(core.angularVelocity.isFinite());
data.invMass = core.inverseMass;
data.penBiasClamp = core.maxPenBias;
//data.writeIndex = PxgSolverBody::InvalidHandle;
data.reportThreshold = core.contactReportThreshold;
data.maxImpulse = core.maxContactImpulse;
data.offsetSlop = 0.0f;
data.body2World = PxAlignedTransform(core.body2World.p.x, core.body2World.p.y, core.body2World.p.z,
PxAlignedQuat(core.body2World.q.x, core.body2World.q.y, core.body2World.q.z, core.body2World.q.w));
data.flags = PxRigidBodyFlag::eKINEMATIC;
}
static void atomArticulationIntegration(const PxU32 numArticulations,
const PxNodeIndex* const PX_RESTRICT islandNodes,
IG::SimpleIslandManager& islandManager,
PxI32* maxPosIters, PxI32* maxVelIters)
{
PxU32 localMaxPosIter = 0, localMaxVelIter = 0;
for (PxU32 a = 0; a < numArticulations; ++a)
{
const PxNodeIndex nodeId = islandNodes[a];
//const PxU32 nodeIndex = nodeId.index();
Dy::FeatherstoneArticulation* artic = getArticulationFromIG(islandManager.getAccurateIslandSim(), nodeId);
const PxU16 iterCount = artic->getIterationCounts();
localMaxPosIter = PxMax<PxU32>(PxU32(iterCount & 0xff), localMaxPosIter);
localMaxVelIter = PxMax<PxU32>(PxU32(iterCount >> 8), localMaxVelIter);
}
PxAtomicMax(maxPosIters, (PxI32)localMaxPosIter);
PxAtomicMax(maxVelIters, (PxI32)localMaxVelIter);
}
class PxgSetupKinematicTask : public Cm::Task
{
const PxNodeIndex* const PX_RESTRICT mKinematicNodes;
PxNodeIndex* mActiveNodeIndex; //copy island node index into this list
const PxU32 mNumBodies;
IG::SimpleIslandManager& mIslandManager;
PxU32 mSolverBodyStartIndex;
PxgSolverBodyData* mSolverBodyDataPool;
PxgSolverBodySleepData* mSolverBodySleepDataPool;
PxgSolverTxIData* mSolverTxIData;
PX_NOCOPY(PxgSetupKinematicTask)
public:
PxgSetupKinematicTask(const PxNodeIndex* const PX_RESTRICT kinematicNodes, PxNodeIndex* activeNodeIndex, const PxU32 numBodies,
IG::SimpleIslandManager& islandManager, PxU32 solverBodyStartIndex, PxgSolverBodyData* solverBodyDataPool,
PxgSolverBodySleepData* solverBodySleepDataPool, PxgSolverTxIData* txIData) : Cm::Task(0), mKinematicNodes(kinematicNodes), mActiveNodeIndex(activeNodeIndex), mNumBodies(numBodies),
mIslandManager(islandManager), mSolverBodyStartIndex(solverBodyStartIndex), mSolverBodyDataPool(solverBodyDataPool),
mSolverBodySleepDataPool(solverBodySleepDataPool), mSolverTxIData(txIData)
{
}
virtual void runInternal() PX_OVERRIDE PX_FINAL
{
IG::IslandSim& islandSim = mIslandManager.getAccurateIslandSim();
//Set up solver bodies for any kinematic bodies
for (PxU32 i = 0; i < mNumBodies; i++)
{
PxsRigidBody& rigidBody = *getRigidBodyFromIG(islandSim, mKinematicNodes[i]);
const PxsBodyCore& core = rigidBody.getCore();
copyToSolverBodyStaticAndKinematic(mSolverBodyDataPool[i], mSolverTxIData[i], core, mKinematicNodes[i]);
//mActiveNodeIndex[mSolverBodyStartIndex + i] = mKinematicNodes[i];
rigidBody.saveLastCCDTransform();
}
}
virtual const char* getName() const PX_OVERRIDE PX_FINAL
{
return "PxgKinematicSetupTask";
}
};
class PxgAtomIntegrationTask : public Cm::Task
{
const PxNodeIndex* const PX_RESTRICT mIslandNodes;
const PxU32 mNumBodies;
PxI32* mMaxPosIters;
PxI32* mMaxVelIters;
IG::SimpleIslandManager& mIslandManager;
PX_NOCOPY(PxgAtomIntegrationTask)
public:
PxgAtomIntegrationTask(const PxNodeIndex* const PX_RESTRICT islandNodes, const PxU32 numBodies, PxI32* PX_RESTRICT maxPosIters, PxI32* PX_RESTRICT maxVelIters,
IG::SimpleIslandManager& islandManager) : Cm::Task(0),
mIslandNodes(islandNodes),
mNumBodies(numBodies), mMaxPosIters(maxPosIters), mMaxVelIters(maxVelIters),
mIslandManager(islandManager)
{
}
virtual void runInternal() PX_OVERRIDE PX_FINAL
{
PX_PROFILE_ZONE("GpuDynamics.PxgIntegrateTask", 0);
PxI32 localPosIters = 0; PxI32 localVelIters = 0;
IG::IslandSim& sim = mIslandManager.getAccurateIslandSim();
for (PxU32 i = 0; i < mNumBodies; ++i)
{
const PxNodeIndex nodeId = mIslandNodes[i];
//activeNodeIndex[startIndex] = nodeId;
PxsRigidBody& rigidBody = *getRigidBodyFromIG(sim, nodeId);
localPosIters = PxMax<PxI32>(PxI32(rigidBody.mSolverIterationCounts & 0xff), localPosIters);
localVelIters = PxMax<PxI32>(PxI32(rigidBody.mSolverIterationCounts >> 8), localVelIters);
}
PxAtomicMax(mMaxPosIters, localPosIters);
PxAtomicMax(mMaxVelIters, localVelIters);
}
virtual const char* getName() const PX_OVERRIDE PX_FINAL
{
return "PxgIntegrateTask";
}
};
class PxgArticulationAtomIntegrationTask : public Cm::Task
{
const PxNodeIndex* const PX_RESTRICT mIslandNodes;
const PxU32 mNumArticulations;
PxI32* mMaxPosIters;
PxI32* mMaxVelIters;
IG::SimpleIslandManager& mIslandManager;
PX_NOCOPY(PxgArticulationAtomIntegrationTask)
public:
PxgArticulationAtomIntegrationTask(
const PxNodeIndex* const PX_RESTRICT islandNodes,
const PxU32 numArticulations, PxI32* maxPosIters,
PxI32* maxVelIters,
IG::SimpleIslandManager& islandManager
) :
Cm::Task(0), mIslandNodes(islandNodes),
mNumArticulations(numArticulations),
mMaxPosIters(maxPosIters),
mMaxVelIters(maxVelIters),
mIslandManager(islandManager)
{
}
virtual void runInternal() PX_OVERRIDE PX_FINAL
{
PX_PROFILE_ZONE("GpuDynamics.PxgArticulationAtomIntegrationTask", 0);
atomArticulationIntegration(mNumArticulations, mIslandNodes,
mIslandManager, mMaxPosIters, mMaxVelIters);
}
virtual const char* getName() const PX_OVERRIDE PX_FINAL
{
return "PxgArticulationAtomIntegrationTask";
}
};
void PxgGpuContext::doPreIntegrationTaskCommon(physx::PxBaseTask* continuation)
{
// AD: this task currently assumes we only have 1 solver island. If there is a variable amount of islands,
// the dependency chain needs to be fixed, because this task runs in parallel to allocating and setting
// the members of mIslandContextPool. (see Pxg(TGS)DynamicsContext::update()).
mNumContactBatches = 0;
mNum1dConstraintBatches = 0;
mNumArtiContactBatches = 0;
mNumArti1dConstraintBatches = 0;
mArtiStaticConstraintBatchOffset = 0;
mArtiStaticContactBatchOffset = 0;
const IG::IslandSim& islandSim = mIslandManager.getAccurateIslandSim();
const PxU32 workerCount = PxMax(1u, continuation->getTaskManager()->getCpuDispatcher()->getWorkerCount());
const PxU32 atomBatchSize = PxMax(256u, PxMin(1024u, (mBodyCount + workerCount - 1) / workerCount));
const PxNodeIndex* const PX_RESTRICT nodeIndices = islandSim.getActiveNodes(IG::Node::eRIGID_BODY_TYPE);
mGpuSolverCore->acquireContext();
const PxNodeIndex* const PX_RESTRICT articulationNodeIndices = islandSim.getActiveNodes(IG::Node::eARTICULATION_TYPE);
//Because we need to put the articulation active node index into the same list as mActiveNodeIndex, so we need to make sure
//articulation active node index start in the right place. In the active node index list, we start with static + kinematic +
//active rigid bodies + active articulations
//const PxU32 articulationStartIndex = island.mBodyStartIndex + island.mBodyCount;
if (isStateDirty())
{
mCachedPositionIterations = 0;
mCachedVelocityIterations = 0;
//Loop through and fill in properties from all the rigid bodies...
for (PxU32 a = 0; a < mBodyCount; a += atomBatchSize)
{
PxgAtomIntegrationTask* task = static_cast<PxgAtomIntegrationTask*>(mFlushPool.allocate(sizeof(PxgAtomIntegrationTask)));
task = PX_PLACEMENT_NEW(task, PxgAtomIntegrationTask)(nodeIndices + a, PxMin(atomBatchSize, mBodyCount - a), &mCachedPositionIterations,
&mCachedVelocityIterations, mIslandManager);
task->setContinuation(continuation);
task->removeReference();
}
setStateDirty(false);
const PxU32 articulationBatchSize = 1024u;
for (PxU32 a = 0; a < mArticulationCount; a += articulationBatchSize)
{
PxgArticulationAtomIntegrationTask* task = static_cast<PxgArticulationAtomIntegrationTask*>(mFlushPool.allocate(sizeof(PxgArticulationAtomIntegrationTask)));
task = PX_PLACEMENT_NEW(task, PxgArticulationAtomIntegrationTask)(
articulationNodeIndices + a,
PxMin(articulationBatchSize, mArticulationCount - a), &mCachedPositionIterations,
&mCachedVelocityIterations, mIslandManager);
task->setContinuation(continuation);
task->removeReference();
}
}
const PxU32 kinematicBatchSize = 1024u;
const PxNodeIndex*const kinematicIndices = islandSim.getActiveKinematics();
for (PxU32 a = 0; a < mKinematicCount; a += kinematicBatchSize)
{
PxgSetupKinematicTask* task = PX_PLACEMENT_NEW(mFlushPool.allocate(sizeof(PxgSetupKinematicTask)), PxgSetupKinematicTask)
(kinematicIndices + a, mActiveNodeIndex.begin(), PxMin(mKinematicCount - a, kinematicBatchSize), mIslandManager, a + 1, mSolverBodyDataPool.begin() + a + 1,
mSolverBodySleepDataPool.begin() + a + 1, mSolverTxIDataPool.begin() + a + 1);
task->setContinuation(continuation);
task->removeReference();
}
PxgSimulationController* gpuSimController = static_cast<PxgSimulationController*>(mSimulationController);
//const PxU32 numParticles = gpuSimController->getNbParticleSystems();
PxgBodySimManager& bodySimManager = gpuSimController->getBodySimManager();
void** bodySimsLL = bodySimManager.mBodies.begin();
PxI32 maxPosIters = 0, maxVelIters = 0;
const PxU32 numParticleCores = mGpuParticleSystemCores.size();
for (PxU32 i = 0; i < numParticleCores; ++i)
{
PxgParticleSystemCore* particleCore = mGpuParticleSystemCores[i];
particleCore->getMaxIterationCount(bodySimManager, maxPosIters, maxVelIters);
}
{
//Need to implement soft body
PxU32* softBodyNodeIndex = gpuSimController->getSoftBodyNodeIndex();
const PxU32 nbActiveSoftbodies = bodySimManager.mActiveSoftbodies.size();
PxU32* activeSoftbodies = bodySimManager.mActiveSoftbodies.begin();
for (PxU32 i = 0; i < nbActiveSoftbodies; ++i)
{
const PxU32 index = activeSoftbodies[i];
const PxU32 nodeIdex = softBodyNodeIndex[index];
Dy::DeformableVolume* dySoftBody = reinterpret_cast<Dy::DeformableVolume*>(bodySimsLL[nodeIdex]);
const PxU16 solverIterationCounts = dySoftBody->getIterationCounts();
maxPosIters = PxMax(PxI32(solverIterationCounts & 0xff), maxPosIters);
maxVelIters = PxMax(PxI32(solverIterationCounts >> 8), maxVelIters);
}
}
{
// FEM cloth
PxU32* femClothNodeIndex = gpuSimController->getFEMClothNodeIndex();
const PxU32 nbActiveFEMCloths = bodySimManager.mActiveFEMCloths.size();
PxU32* activeFEMCloths = bodySimManager.mActiveFEMCloths.begin();
for (PxU32 i = 0; i < nbActiveFEMCloths; ++i)
{
const PxU32 index = activeFEMCloths[i];
const PxU32 nodeIdex = femClothNodeIndex[index];
Dy::DeformableSurface* dyFEMCloth = reinterpret_cast<Dy::DeformableSurface*>(bodySimsLL[nodeIdex]);
const PxU16 solverIterationCounts = dyFEMCloth->getIterationCounts();
maxPosIters = PxMax(PxI32(solverIterationCounts & 0xff), maxPosIters);
//maxVelIters = PxMax(PxI32(solverIterationCounts >> 8), maxVelIters);
}
}
PxAtomicMax(&mCachedPositionIterations, maxPosIters);
PxAtomicMax(&mCachedVelocityIterations, maxVelIters);
mGpuSolverCore->releaseContext();
}
void PxgGpuContext::doConstraintPrePrepCommon(physx::PxBaseTask* continuation)
{
mGpuSolverCore->acquireContext();
m1dConstraintBatchIndices.forceSize_Unsafe(0);
m1dConstraintBatchIndices.reserve(mIncrementalPartition.getNbConstraintBatches() + mNumStaticRigid1dConstraintBatches);
mContactConstraintBatchIndices.forceSize_Unsafe(0);
mContactConstraintBatchIndices.reserve(mIncrementalPartition.getNbContactBatches() + mNumStaticRigidContactBatches);
mArti1dConstraintBatchIndices.forceSize_Unsafe(0);
mArti1dConstraintBatchIndices.reserve(mIncrementalPartition.getNbArtiConstraintBatches() + mNumStaticArti1dConstraintBatches + mNumSelfArti1dConstraintBatches);
mArtiContactConstraintBatchIndices.forceSize_Unsafe(0);
mArtiContactConstraintBatchIndices.reserve(mIncrementalPartition.getNbArtiContactBatches() + mNumStaticArtiContactBatches + mNumSelfArtiContactBatches);
mIslandContextPool[0].mNumPositionIterations = mCachedPositionIterations;
mIslandContextPool[0].mNumVelocityIterations = mCachedVelocityIterations;
mNum1dConstraintBatches = (PxI32)mIncrementalPartition.getNbConstraintBatches();
mNumContactBatches = (PxI32)mIncrementalPartition.getNbContactBatches();
mNumArtiContactBatches = (PxI32)mIncrementalPartition.getNbArtiContactBatches();
mNumArti1dConstraintBatches = (PxI32)mIncrementalPartition.getNbArtiConstraintBatches();
PxgBodySimManager& bodyManager = getSimulationController()->getBodySimManager();
const PxU32 nbStaticSlabs = (PxMax(bodyManager.mMaxStaticRBJoints, bodyManager.mMaxStaticRBContacts) + mMaxNumStaticPartitions - 1) / mMaxNumStaticPartitions;
const PxU32 maxCombinedSlabPartitions = mIncrementalPartition.getCombinedSlabMaxNbPartitions();
mGpuSolverCore->gpuMemDmaUpBodyData(mSolverBodyDataPool, mSolverTxIDataPool, mIslandManager.getNbNodeHandles() + 1,
mNumConstraintBatches, mNumArticConstraintBatches, PxMax(1u, (mIncrementalPartition.getNbPartitions() + maxCombinedSlabPartitions - 1) / maxCombinedSlabPartitions),
nbStaticSlabs, mMaxNumStaticPartitions);
//Allocate enough space for the friction patches now that we know how many we need after constraint partitioning
{
PX_PROFILE_ZONE("GpuDynamics.allocateFrictionPatchStreams", 0);
mGpuSolverCore->allocateFrictionPatchStream(mNumContactBatches + mNumStaticRigidContactBatches, mNumArtiContactBatches + mNumStaticArtiContactBatches + mNumSelfArtiContactBatches);
}
mNum1DConstraintBlockPrepPool = (PxU32)mNum1dConstraintBatches;
const PxU32 nbConstraintsPerBatch = mIsTGS ? PxgCpuConstraintPrePrepTask::NbConstraintsPerTaskTGS : PxgCpuConstraintPrePrepTask::NbConstraintsPerTaskPGS; //Each task processed up to PxgCpuConstraintPrePrepTask::NbConstraintsPerTask constraints of a certain type
const PxU32 nbArtiConstraintsPerBatch = mIsTGS ? PxgCpuArtiConstraintPrePrepTask::NbConstraintsPerTaskTGS : PxgCpuArtiConstraintPrePrepTask::NbConstraintsPerTaskPGS;
PxU32 constraintBatchIndex = 0;
PxU32 contactBatchIndex = 0;
PxU32 articulationConstraintBatchIndex = mNum1dConstraintBatches;
PxU32 articulationContactBatchIndex = mNumContactBatches;
const PxU32 batchMask = PXG_BATCH_SIZE - 1;
mHasForceThresholds = mIncrementalPartition.hasForceThresholds();
const PxInt32ArrayPinned& startSlabIter = mIncrementalPartition.getStartSlabPerPartition();
const PxInt32ArrayPinned& articstartSlabIter = mIncrementalPartition.getArticStartSlabPerPartition();
PxgJointManager& jointManager = static_cast<PxgSimulationController*>(mSimulationController)->getJointManager();
const PxPinnedArray<PxgConstraintPrePrep>& rigidPreprepIter = jointManager.getGpuRigidJointPrePrep();
const PxPinnedArray<PxgConstraintPrePrep>& artiPreprepIter = jointManager.getGpuArtiJointPrePrep();
//The code below iterates over all partitions, producing tasks to fill in data.
//Running indices
PxU32 startIdx = 0; //Which partition to start at
PxU32 startBatchOffset = 0; //Batch offset within the partition
PxU32 startOffset = 0; //Constraint offset within the partition
PxU32 runningContactCount = 0; //The running count of the number of contact constraints that will be processed by the next task
PxU32 runningBatchCount = 0; //The running count of the number of batches that will be processed by the next task
{
PX_PROFILE_ZONE("Process Partitions", 0);
for (PxU32 i = 0; i < mIncrementalPartition.getNbPartitions(); ++i) // this is looping over "true" partitions, not the combined ones for the solver
{
const Partition& partition = mIncrementalPartition.getPartitionSlabs()[i / PXG_BATCH_SIZE]->mPartitions[i&(PXG_BATCH_SIZE - 1)];
const PxU32 nbContacts = partition.mPartitionIndices[PxgEdgeType::eCONTACT_MANAGER].size();
const PxU32 nbConstraints = partition.mPartitionIndices[PxgEdgeType::eCONSTRAINT].size();
const PxU32 nbArtiContacts = partition.mPartitionIndices[PxgEdgeType::eARTICULATION_CONTACT].size();
const PxU32 nbArtiConstraints = partition.mPartitionIndices[PxgEdgeType::eARTICULATION_CONSTRAINT].size();
//PxU32* constraintIds = partition.mPartitionIndices[IG::Edge::eCONSTRAINT].begin();
const PartitionIndices& constraintIds = partition.mPartitionIndices[PxgEdgeType::eCONSTRAINT];
const PartitionIndices& artiConstraintIds = partition.mPartitionIndices[PxgEdgeType::eARTICULATION_CONSTRAINT];
const PartitionIndices& artiContactIds = partition.mPartitionIndices[PxgEdgeType::eARTICULATION_CONTACT];
const PxU32 jointStartIndex = mIncrementalPartition.getJointStartIndices()[i];
PxU32 batchIndex = startSlabIter[i];
PxU32 localArticBatchIndex = articstartSlabIter[i];
PxU32 batchOffset = 0;
for (PxU32 a = 0; a < nbConstraints; a += nbConstraintsPerBatch)
{
PxU32 nbConstraintsToProcess = PxMin(nbConstraints - a, nbConstraintsPerBatch);
PxU32 nbBatches = (nbConstraintsToProcess + batchMask) / PXG_BATCH_SIZE;
PxgCpuConstraintPrePrepTask* task = (PxgCpuConstraintPrePrepTask*)mFlushPool.allocate(sizeof(PxgCpuConstraintPrePrepTask));
task = PX_PLACEMENT_NEW(task, PxgCpuConstraintPrePrepTask)(constraintIds, a, nbConstraintsToProcess,
mConstraintBatchHeaders + batchIndex, nbBatches, constraintBatchIndex, jointStartIndex + a, mConstraintUniqueIndices,
rigidPreprepIter.begin());
task->setContinuation(continuation);
task->removeReference();
for (PxU32 b = 0; b < nbBatches; ++b)
{
PxU32 val = batchIndex + b;
m1dConstraintBatchIndices.pushBack(val);
}
constraintBatchIndex += nbBatches;
batchIndex += nbBatches;
}
PxU32 remainingContacts = nbContacts;
PxU32 localOffset = 0;
//While there are constraints in this partition, process them in chunks of ~nbConstraintsPerBatch
while ((runningContactCount + remainingContacts) >= nbConstraintsPerBatch)
{
//We are aiming to process approximately 2048 constraints. However, to simplify the logic in the CPU PrePrep task,
//we actually can process a little more than that to fill up entire batches. Each batch contains 32 constraints.
PxU32 nbConstraintsFromThisPartition = nbConstraintsPerBatch - runningContactCount; //Number of constraints from this partition
PxU32 nbBatchesFromThisPartition = ((nbConstraintsFromThisPartition + batchMask) / PXG_BATCH_SIZE); //The number of batches from this partition (groups of 32 constraints)
//Round up the number of constraints from this partition to be full batches unless there are insufficient constraints in this partition to create a full batch
nbConstraintsFromThisPartition = PxMin((nbConstraintsFromThisPartition + batchMask)&(~(batchMask)), remainingContacts);
PxU32 totalBatches = runningBatchCount + nbBatchesFromThisPartition;
PxU32 nbConstraintsToProcess = runningContactCount + nbConstraintsFromThisPartition;
PxgCpuContactPrePrepTask* task = (PxgCpuContactPrePrepTask*)mFlushPool.allocate(sizeof(PxgCpuContactPrePrepTask));
task = PX_PLACEMENT_NEW(task, PxgCpuContactPrePrepTask)(mIncrementalPartition, startIdx, startOffset, nbConstraintsToProcess,
startSlabIter.begin(), startBatchOffset, mIncrementalPartition.getContactStartIndices().begin(),
mConstraintBatchHeaders, totalBatches, contactBatchIndex, mContactUniqueIndices,
mOutputIterator, mPatchStreamAllocators[mCurrentContactStream]->mStart,
mContactStreamAllocators[mCurrentContactStream]->mStart);
task->setContinuation(continuation);
task->removeReference();
//Update contact counts
remainingContacts -= nbConstraintsFromThisPartition;
localOffset += nbConstraintsFromThisPartition;
for (PxU32 b = 0; b < nbBatchesFromThisPartition; ++b)
{
PxU32 val = batchIndex + b;
mContactConstraintBatchIndices.pushBack(val);
}
//Update iteration indices in this partition
contactBatchIndex += totalBatches;
batchIndex += nbBatchesFromThisPartition;
batchOffset += nbBatchesFromThisPartition;
//Update global task iteration indices
startIdx = i;
startOffset = localOffset;
startBatchOffset = batchOffset;
runningContactCount = 0;
runningBatchCount = 0;
}
//We have remaining constraints. If so, sum them up and continue iterating...
PxU32 remainingBatches = (remainingContacts + batchMask) / PXG_BATCH_SIZE;
runningContactCount += remainingContacts;
runningBatchCount += remainingBatches;
for (PxU32 b = 0; b < remainingBatches; ++b)
{
PxU32 val = batchIndex + b;
mContactConstraintBatchIndices.pushBack(val);
}
//batchIndex += runningBatchCount;
PxU32 localArtiJointStartIndex = mIncrementalPartition.getArtiJointStartIndices()[i];
//constraintBatchIndex += contactBatchIndex;
//articulation constraints
for (PxU32 a = 0; a < nbArtiConstraints; a += nbArtiConstraintsPerBatch)
{
//each constraint is a batch
PxU32 nbConstraintsToProcess = PxMin(nbArtiConstraints - a, nbArtiConstraintsPerBatch);
PxU32 nbBatchesFromThisPartition = ((nbConstraintsToProcess + batchMask) / PXG_BATCH_SIZE); //The number of batches from this partition (groups of 32 constraints)
PxgCpuArtiConstraintPrePrepTask* task = (PxgCpuArtiConstraintPrePrepTask*)mFlushPool.allocate(sizeof(PxgCpuArtiConstraintPrePrepTask));
task = PX_PLACEMENT_NEW(task, PxgCpuArtiConstraintPrePrepTask)(artiConstraintIds, a, nbConstraintsToProcess,
mArticConstraintBatchHeaders + localArticBatchIndex, nbBatchesFromThisPartition, articulationConstraintBatchIndex, localArtiJointStartIndex, mArtiConstraintUniqueIndices,
artiPreprepIter.begin(), false);
localArtiJointStartIndex += nbConstraintsToProcess;
task->setContinuation(continuation);
task->removeReference();
for (PxU32 b = 0; b < nbBatchesFromThisPartition; ++b)
{
PxU32 val = localArticBatchIndex + b;
mArti1dConstraintBatchIndices.pushBack(val);
}
articulationConstraintBatchIndex += nbBatchesFromThisPartition;
localArticBatchIndex += nbBatchesFromThisPartition;
}
PxU32 localArtiContactStartIndex = mIncrementalPartition.getArtiContactStartIndices()[i];
//articulation contacts
for (PxU32 a = 0; a < nbArtiContacts; a += nbArtiConstraintsPerBatch)
{
//each contact is a batch
PxU32 nbContactsToProcess = PxMin(nbArtiContacts - a, nbArtiConstraintsPerBatch);
PxU32 nbBatchesFromThisPartition = ((nbContactsToProcess + batchMask) / PXG_BATCH_SIZE); //The number of batches from this partition (groups of 32 constraints)
PxgCpuArtiConstraintPrePrepTask* task = (PxgCpuArtiConstraintPrePrepTask*)mFlushPool.allocate(sizeof(PxgCpuArtiConstraintPrePrepTask));
task = PX_PLACEMENT_NEW(task, PxgCpuArtiConstraintPrePrepTask)(artiContactIds, a, nbContactsToProcess,
mArticConstraintBatchHeaders + localArticBatchIndex, nbBatchesFromThisPartition, articulationContactBatchIndex, localArtiContactStartIndex, mArtiContactUniqueIndices,
artiPreprepIter.begin(), true);
localArtiContactStartIndex += nbContactsToProcess;
task->setContinuation(continuation);
task->removeReference();
for (PxU32 b = 0; b < nbBatchesFromThisPartition; ++b)
{
PxU32 val = localArticBatchIndex + b;
mArtiContactConstraintBatchIndices.pushBack(val);
}
articulationContactBatchIndex += nbBatchesFromThisPartition;
localArticBatchIndex += nbBatchesFromThisPartition;
}
}
if (runningBatchCount > 0)
{
//There are remaining unprocessed contact constraints
PxgCpuContactPrePrepTask* task = (PxgCpuContactPrePrepTask*)mFlushPool.allocate(sizeof(PxgCpuContactPrePrepTask));
task = PX_PLACEMENT_NEW(task, PxgCpuContactPrePrepTask)(mIncrementalPartition, startIdx, startOffset, runningContactCount,
startSlabIter.begin(), startBatchOffset, mIncrementalPartition.getContactStartIndices().begin(),
mConstraintBatchHeaders, runningBatchCount, contactBatchIndex, mContactUniqueIndices,
mOutputIterator, mPatchStreamAllocators[mCurrentContactStream]->mStart,
mContactStreamAllocators[mCurrentContactStream]->mStart);
task->setContinuation(continuation);
task->removeReference();
}
}
doStaticArticulationConstraintPrePrep(continuation, articulationConstraintBatchIndex, articulationContactBatchIndex);
doStaticRigidConstraintPrePrep(continuation);
mGpuSolverCore->releaseContext();
}
void PxgGpuContext::doConstraintPrePrepGPUCommon(bool hasForceThresholds)
{
mLostTouchTask->removeReference();
const PxU32 nbCombinedSlabPartitions = mIncrementalPartition.getCombinedSlabNbPartitions();
{
mConstraintsPerPartition.forceSize_Unsafe(0);
if (mConstraintsPerPartition.capacity() < nbCombinedSlabPartitions)
mConstraintsPerPartition.reserve(2 * nbCombinedSlabPartitions);
mArtiConstraintsPerPartition.forceSize_Unsafe(0);
if (mArtiConstraintsPerPartition.capacity() < nbCombinedSlabPartitions)
mArtiConstraintsPerPartition.reserve(2 * nbCombinedSlabPartitions);
for (PxU32 a = 0; a < nbCombinedSlabPartitions; ++a)
{
mConstraintsPerPartition.pushBack(mIncrementalPartition.getCSlabAccumulatedPartitionCount(a));
mArtiConstraintsPerPartition.pushBack(mIncrementalPartition.getCSlabAccumulatedArtiPartitionCount(a));
}
}
mIslandContextPool->mStartPartitionIndex = 0;
mIslandContextPool->mNumPartitions = nbCombinedSlabPartitions;
mIslandContextPool->mBatchStartIndex = 0;
mIslandContextPool->mBatchCount = mIncrementalPartition.getNbConstraintBatches() + mIncrementalPartition.getNbContactBatches();
mIslandContextPool->mArtiBatchStartIndex = 0;
mIslandContextPool->mArtiBatchCount = mIncrementalPartition.getNbArtiConstraintBatches() + mIncrementalPartition.getNbArtiContactBatches();
//mIslandContextPool->mStaticArtiBatchCount = getSimulationController()->getBodySimManager().mTotalArticJoints + getSimulationController()->getBodySimManager().mTotalArticContacts;
PxgJointManager& jointManager = getSimulationController()->getJointManager();
const PxU32 gpuRigidJointSize = jointManager.getGpuNbRigidConstraints();
const PxU32 cpuRigidJointSize = jointManager.getCpuNbRigidConstraints();
const PxU32 gpuArtiJointSize = jointManager.getGpuNbArtiConstraints();
const PxU32 cpuArtiJointSize = jointManager.getCpuNbArtiConstraints();
PxgConstraintPrePrepData ppData;
ppData.nbGpuRigidJoints = gpuRigidJointSize;
ppData.nbTotalRigidJoints = gpuRigidJointSize + cpuRigidJointSize;
ppData.nbGpuArtiJoints = gpuArtiJointSize;
ppData.nbTotalArtiJoints = gpuArtiJointSize + cpuArtiJointSize;
ppData.numContactBatches = PxU32(mNumContactBatches);
ppData.num1dConstraintBatches = PxU32(mNum1dConstraintBatches);
ppData.numStaticContactBatches = PxU32(mNumStaticRigidContactBatches);
ppData.numStatic1dConstraintBatches = PxU32(mNumStaticRigid1dConstraintBatches);
ppData.numArtiContactsBatches = PxU32(mNumArtiContactBatches);
ppData.numArti1dConstraintBatches = PxU32(mNumArti1dConstraintBatches);
ppData.numArtiStaticContactsBatches = PxU32(mNumStaticArtiContactBatches);
ppData.numArtiStatic1dConstraintBatches = PxU32(mNumStaticArti1dConstraintBatches);
ppData.numArtiSelfContactsBatches = PxU32(mNumSelfArtiContactBatches);
ppData.numArtiSelf1dConstraintBatches = PxU32(mNumSelfArti1dConstraintBatches);
ppData.artiStaticConstraintBatchOffset = PxU32(mArtiStaticConstraintBatchOffset);
ppData.artiStaticContactBatchOffset = PxU32(mArtiStaticContactBatchOffset);
ppData.contactUniqueIndices = mContactUniqueIndices;
ppData.constraintUniqueIndices = mConstraintUniqueIndices;
ppData.artiContactUniqueIndices = mArtiContactUniqueIndices;
ppData.artiConstraintUniqueindices = mArtiConstraintUniqueIndices;
ppData.artiStaticConstraintUniqueIndices = mArtiStaticConstraintUniqueIndices;
ppData.artiStaticContactUniqueIndices = mArtiStaticContactUniqueIndices;
ppData.artiStaticConstraintStartIndex = mArtiStaticConstraintStartIndex;
ppData.artiStaticConstraintCount = mArtiStaticConstraintCount;
ppData.artiStaticContactStartIndex = mArtiStaticContactStartIndex;
ppData.artiStaticContactCount = mArtiStaticContactCount;
ppData.constraint1DBatchIndices = m1dConstraintBatchIndices.begin();
ppData.constraintContactBatchIndices = mContactConstraintBatchIndices.begin();
ppData.artiConstraint1dBatchindices = mArti1dConstraintBatchIndices.begin();
ppData.artiConstraintContactBatchIndices = mArtiContactConstraintBatchIndices.begin();
PxgConstantData cData;
cData.dt = mDt;
cData.invDtF32 = mInvDt;
cData.bounceThresholdF32 = mBounceThreshold;
cData.frictionOffsetThreshold = mFrictionOffsetThreshold;
cData.correlationDistance = mCorrelationDistance;
cData.ccdMaxSeparation = mCCDSeparationThreshold;
cData.biasCoefficient = mIslandContextPool->mBiasCoefficient;
cData.gravity = mGravity;
PxgBodySimManager& bodySimManager = getSimulationController()->getBodySimManager();
PxgPartitionData pData;
pData.constraintsPerPartition = mConstraintsPerPartition.begin();
pData.numConstraintsPerPartition = mConstraintsPerPartition.size();
pData.artiConstraintsPerPartition = mArtiConstraintsPerPartition.begin();
pData.numArtiConstraintsPerPartition = mArtiConstraintsPerPartition.size();
pData.numTotalContacts = mIncrementalPartition.getTotalContacts();
pData.numTotalStaticConstraints = bodySimManager.mTotalStaticRBJoints;
pData.numTotalStaticContacts = bodySimManager.mTotalStaticRBContacts;
pData.numTotalConstraints = mIncrementalPartition.getTotalConstraints();
pData.numTotalArtiContacts = mIncrementalPartition.getTotalArticulationContacts();
pData.numTotalArtiConstraints = mIncrementalPartition.getTotalArticulationConstraints();
pData.numTotalArtiStaticContacts = bodySimManager.mTotalStaticArticContacts;
pData.numTotalArtiStaticConstraints = bodySimManager.mTotalStaticArticJoints;
pData.numTotalArtiSelfContacts = bodySimManager.mTotalSelfArticContacts;
pData.numTotalArtiSelfConstraints = bodySimManager.mTotalSelfArticJoints;
pData.artiStaticConstraintBatchOffset = mArtiStaticConstraintBatchOffset;
pData.artiStaticContactBatchOffset = mArtiStaticContactBatchOffset;
mIslandContextPool->mStaticArtiBatchCount = mNumArtiStaticConstraintBatches;
mIslandContextPool->mSelfArtiBatchCount = mNumArtiSelfConstraintBatches;
mIslandContextPool->mStaticRigidBatchCount = mNumRigidStaticConstraintBatches;
const PxU32 maxCombinedSlabPartitions = mIncrementalPartition.getCombinedSlabMaxNbPartitions();
const PxU32 nbSlabs = PxMax(1u, (mIncrementalPartition.getNbPartitions() + maxCombinedSlabPartitions - 1) / maxCombinedSlabPartitions);
const PxU32 nbPartitions = PxMin(mIncrementalPartition.getNbPartitions(), maxCombinedSlabPartitions);
mGpuArticulationCore->allocDeltaVBuffer(nbSlabs, nbPartitions, mGpuSolverCore->getStream());
mGpuSolverCore->gpuMemDMAUp(*mPinnedMemoryAllocator, ppData, mSolverBodyPool.size(),
mConstraintBatchHeaders, mIslandContextPool, mNumIslandContextPool, pData,
mNumConstraintBatches, mNumRigidStaticConstraintBatches, mNumArticConstraintBatches, mNumArtiStaticConstraintBatches, mNumArtiSelfConstraintBatches, cData,
PXG_MAX_NUM_POINTS_PER_CONTACT_PATCH * (mNumContactBatches + mNumStaticRigidContactBatches), 4u * (mNumContactBatches + mNumStaticRigidContactBatches),
PXG_MAX_NUM_POINTS_PER_CONTACT_PATCH * (mNumArtiContactBatches + mNumStaticArtiContactBatches + mNumSelfArtiContactBatches), 4u * (mNumArtiContactBatches + mNumStaticArtiContactBatches + mNumSelfArtiContactBatches),
mTotalEdges, mTotalPreviousEdges,
nbSlabs,
maxCombinedSlabPartitions, mEnableStabilization, mPatchStreamAllocators[mCurrentContactStream]->mStart, mContactStreamAllocators[mCurrentContactStream]->mStart,
mForceStreamAllocator->mStart, mOutputIterator, mSolverBodyPool.size() - (mKinematicCount + 1), mKinematicCount + 1, mArticulationCount,
reinterpret_cast<Cm::UnAlignedSpatialVector*>(mGpuArticulationCore->getDeferredZ()),
reinterpret_cast<PxU32*>(mGpuArticulationCore->getArticulationDirty()),
reinterpret_cast<uint4*>(mGpuArticulationCore->getArticulationSlabMask()),
mGPUShapeInteractions, mGPURestDistances, mGPUTorsionalData, mArtiStaticContactIndices.begin(), mArtiStaticContactIndices.size(),
mArtiStaticJointIndices.begin(), mArtiStaticJointIndices.size(), mArtiStaticContactCounts.begin(), mArtiStaticJointCounts.begin(),
mArtiSelfContactIndices.begin(), mArtiSelfContactIndices.size(),
mArtiSelfJointIndices.begin(), mArtiSelfJointIndices.size(), mArtiSelfContactCounts.begin(), mArtiSelfJointCounts.begin(),
mRigidStaticContactIndices.begin(), mRigidStaticContactIndices.size(), mRigidStaticJointIndices.begin(), mRigidStaticJointIndices.size(),
mRigidStaticContactCounts.begin(), mRigidStaticJointCounts.begin(), mLengthScale, hasForceThresholds);
//Make sure that the GPU articulation work has completed now...
mGpuArticulationCore->syncUnconstrainedVelocities();
mGpuArticulationCore->layoutDeltaVBuffer(nbSlabs, nbPartitions, mGpuSolverCore->getStream());
mGpuArticulationCore->createStaticContactAndConstraintsBatch(mArticulationCount);
mGpuSolverCore->constraintPrePrepParallel(mNumConstraintBatches + mNumRigidStaticConstraintBatches + mNumArticConstraintBatches + mNumArtiStaticConstraintBatches + mNumArtiSelfConstraintBatches, gpuRigidJointSize + gpuArtiJointSize,
mIslandContextPool->mBodyCount);
}
void PxgCpuJointPrePrepTask::runInternal()
{
PxU32 endIndex = mStartIndex + mNbToProcess;
Px1DConstraint tempRows[Dy::MAX_CONSTRAINT_ROWS];
for (PxU32 i = mStartIndex; i < endIndex; ++i)
{
const Dy::Constraint* constraint = mConstraints[i];
const PxConstraintSolverPrep solverPrep = constraint->solverPrep;
if (!solverPrep)
continue;
const PxTransform& pose0 = (constraint->body0 ? constraint->body0->getPose() : PxTransform(PxIdentity));
const PxTransform& pose1 = (constraint->body1 ? constraint->body1->getPose() : PxTransform(PxIdentity));
const void* constantBlock = constraint->constantBlock;
PxgConstraintData& data = mConstraintData[i];
//Px1DConstraint* rows = &rowIter[i*Dy::MAX_CONSTRAINT_ROWS];
PxMemZero(tempRows, sizeof(Px1DConstraint)*Dy::MAX_CONSTRAINT_ROWS);
for (PxU32 j = 0; j < Dy::MAX_CONSTRAINT_ROWS; j++)
{
Px1DConstraint& c = tempRows[j];
c.minImpulse = -PX_MAX_REAL;
c.maxImpulse = PX_MAX_REAL;
}
PxConstraintInvMassScale ims(1.0f, 1.0f, 1.0f, 1.0f);
PxVec3p ra, rb;
PxVec3p body0WorldOffset(0.0f);
//TAG:solverprepcall
const PxU32 numRows = (constraint->flags & PxConstraintFlag::eDISABLE_CONSTRAINT) ? 0 :(*solverPrep)(tempRows,
body0WorldOffset,
Dy::MAX_CONSTRAINT_ROWS,
ims,
constantBlock,
pose0, pose1, !!(constraint->flags & PxConstraintFlag::eENABLE_EXTENDED_LIMITS), ra, rb);
data.mNumRows_Flags_StartIndex.x = numRows;
if (numRows == 0)
continue;
ra -= pose0.p;
rb -= pose1.p;
data.mInvMassScale.linear0 = ims.linear0;
data.mInvMassScale.angular0 = ims.angular0;
data.mInvMassScale.linear1 = ims.linear1;
data.mInvMassScale.angular1 = ims.angular1;
data.mRaWorld_linBreakForceW = make_float4(ra.x, ra.y, ra.z, constraint->linBreakForce);
data.mRbWorld_angBreakForceW = make_float4(rb.x, rb.y, rb.z, constraint->angBreakForce);
data.mNumRows_Flags_StartIndex.y = constraint->flags;
PxI32 startRowIndex = PxAtomicAdd(mRowCounts, PxI32(numRows)) - PxI32(numRows);
PxMemCopy(mConstraintRows + startRowIndex, tempRows, sizeof(Px1DConstraint) * numRows);
data.mNumRows_Flags_StartIndex.z = mGpuJointOffset + startRowIndex;
}
}
void PxgGpuContext::cpuJointPrePrepTask(physx::PxBaseTask* continuation)
{
PxgJointManager& jointManager = getSimulationController()->getJointManager();
// AD: This could also be skipped with direct-GPU API, but at this point the constraints are already partitioned and I
// cannot figure out how to remove the CPU joints from there again.
const PxArray<const Dy::Constraint*>& cpuRigidConstraints = jointManager.getCpuRigidConstraints();
const PxArray<const Dy::Constraint*>& cpuArtiConstraints = jointManager.getCpuArtiConstraints();
const PxU32 nbCpuRigidConstraints = cpuRigidConstraints.size();
const PxU32 nbCpuArtiConstraints = cpuArtiConstraints.size();
const PxU32 gpuRigidJointOutputOffset = jointManager.getGpuNbRigidConstraints() * Dy::MAX_CONSTRAINT_ROWS;
const PxU32 nbJointsPerTask = 128u; // PT: TODO: revisit
//for other joint
for (PxU32 a = 0; a < nbCpuRigidConstraints; a += nbJointsPerTask)
{
const PxU32 nbToProcess = PxMin(nbCpuRigidConstraints - a, nbJointsPerTask);
PxgCpuJointPrePrepTask* task = reinterpret_cast<PxgCpuJointPrePrepTask*>(mFlushPool.allocate(sizeof(PxgCpuJointPrePrepTask)));
task = PX_PLACEMENT_NEW(task, PxgCpuJointPrePrepTask)(*getSimulationController(), a, nbToProcess, gpuRigidJointOutputOffset,
cpuRigidConstraints.begin(), jointManager.getCpuRigidConstraintData().begin(), jointManager.getCpuRigidConstraintRows().begin(),
&jointManager.mNbCpuRigidConstraintRows);
task->setContinuation(continuation);
task->removeReference();
}
const PxU32 gpuArtiJointOutputOffset = jointManager.getGpuNbArtiConstraints() * Dy::MAX_CONSTRAINT_ROWS;
for (PxU32 a = 0; a < nbCpuArtiConstraints; a += nbJointsPerTask)
{
const PxU32 nbToProcess = PxMin(nbCpuArtiConstraints - a, nbJointsPerTask);
PxgCpuJointPrePrepTask* task = reinterpret_cast<PxgCpuJointPrePrepTask*>(mFlushPool.allocate(sizeof(PxgCpuJointPrePrepTask)));
task = PX_PLACEMENT_NEW(task, PxgCpuJointPrePrepTask)(*getSimulationController(), a, nbToProcess, gpuArtiJointOutputOffset,
cpuArtiConstraints.begin(), jointManager.getCpuArtiConstraintData().begin(),
jointManager.getCpuArtiConstraintRows().begin(), &jointManager.mNbCpuArtiConstraintRows);
task->setContinuation(continuation);
task->removeReference();
}
}
// This class figures out the max iteration counts for all actors,
// and prepares some data for kinematics.
void PxgCpuPreIntegrationTask::runInternal()
{
mContext.doPreIntegrationTaskCommon(mCont);
}
void PxgCpuContactPrePrepTask::runInternal()
{
PX_PROFILE_ZONE("GpuDynamics.PxgCpuContactPrePrepTask", 0);
const PxU32 nbToProcess = mNumBatches;
PxU32 nbProcessed = 0;
PxU32 partitionIdx = mPartitionIndex;
PxU32 partitionStartIdx = mStartIndexWithinPartition;
PxU32 startSlabOffset = mStartSlabOffset;
PxU32 workUnitIndex = mWorkUnitStartIndex;
while (nbProcessed < nbToProcess)
{
//Extract current partition
const Partition& partition = mPartition.getPartitionSlabs()[partitionIdx / PXG_BATCH_SIZE]->mPartitions[partitionIdx&(PXG_BATCH_SIZE - 1)];
//Get edgeIndices corresponding to this partition offset by partitionStartIdx
const PartitionIndices& edgeIds = partition.mPartitionIndices[PxgEdgeType::eCONTACT_MANAGER];// +partitionStartIdx;
//Factor in joint constraints to work out offsets in this partition. As this task can now process multiple partitions,
//it is easiest just to compute them again here
{
const PxU32 nbConstraints = partition.mPartitionIndices[PxgEdgeType::eCONSTRAINT].size();
const PxU32 nbBatches = (nbConstraints + 31u) / PXG_BATCH_SIZE;
startSlabOffset += nbBatches;
}
const PxU32 batchIndex = mStartSlabIter[partitionIdx] + startSlabOffset;
const PxU32 uniqueStartIndex = mContactStartIndices[partitionIdx] + partitionStartIdx;
//The number we process in this partition is equal to the smaller of (nbToProcess - nbProcessed) and (size of partition - startOffsetInPartition).
const PxU32 nbRemaining = partition.mPartitionIndices[PxgEdgeType::eCONTACT_MANAGER].size() - partitionStartIdx;
//Convert from constraints to batches
const PxU32 nbBatchesToProcess = PxMin((nbToProcess - nbProcessed), (nbRemaining + 31) / PXG_BATCH_SIZE);
PxU32 currentEdgeIndex = 0;
for (PxU32 a = 0; a < nbBatchesToProcess; ++a)
{
const PxU32 descStride = PxMin(nbRemaining - currentEdgeIndex, PXG_BATCH_SIZE);
PxgConstraintBatchHeader& batchHeader = mBatchHeaders[a + batchIndex];
batchHeader.constraintType = PxgSolverConstraintDesc::eCONTACT;
batchHeader.mDescStride = PxU16(descStride);
batchHeader.mConstraintBatchIndex = workUnitIndex++;
batchHeader.mStartPartitionIndex = uniqueStartIndex + a * PXG_BATCH_SIZE;
batchHeader.mask = 0xFFFFFFFF; //Unused
#if PXG_CONTACT_VALIDATION
validateContactPairs(a, a + descStride, edgeIds + a, mNpIds, mOutputIterator, mBaseContactPatch, mBaseContactPointer);
#endif
currentEdgeIndex += descStride;
}
for (PxU32 i = 0; i < nbRemaining; ++i)
{
const PxU32 uniqueId = edgeIds[i + partitionStartIdx];
mPinnedEdgeIds[uniqueStartIndex + i] = uniqueId;
}
nbProcessed += nbBatchesToProcess;
partitionIdx++;
partitionStartIdx = 0;
startSlabOffset = 0;
//PxMemCopy(mPinnedEdgeIds + uniqueStartIndex, edgeIds, sizeof(PxU32) * nbRemaining);
}
}
void PxgGpuContext::allocateTempPinnedSolverMemoryCommon()
{
// AD: two stages.
// 1. first figure out how much we need. Allocate PxMax(sizeNeeded, PxGpuDynamicsMemoryConfig::tempBufferCapacity).
// 2. suballocate and set the pointers.
// AD: old comment that moved here when outlining into a separate function. I don't know how relevant this still is.
// KS - this may be over-allocating because, at this stage, we only know (1) how many articulation static contacts
// we have in total, (2) how many is the max a given articulation has and (3) how many articulations we have.
// We allocate the minimum of maxBatches * numArticulations, totalContacts. We will likely require less than
// both of these counts, but this provides us with an upper-bound...
// this code operates under the assumption that we only have 1 solver island on GPU.
PxU64 sizeNeeded = 0;
const PxU32 alignment = 128; // GPU cache line size.
const PxU32 totalIslands = 1;
const PxU64 totalIslandsAllocationSize = (totalIslands * sizeof(PxgIslandContext)) + alignment;
sizeNeeded += totalIslandsAllocationSize;
mNumConstraintBatches = mIncrementalPartition.getNbConstraintBatches() + mIncrementalPartition.getNbContactBatches();
PxgBodySimManager& bodyManager = getSimulationController()->getBodySimManager();
const PxU32 maxStaticRigidJoints = bodyManager.mMaxStaticRBJoints;
const PxU32 maxStaticRigidContacts = bodyManager.mMaxStaticRBContacts;
const PxU32 nbRigidBatches = (mBodyCount + PXG_BATCH_SIZE - 1) / PXG_BATCH_SIZE;
const PxU32 totalStaticRigidContacts = bodyManager.mTotalStaticRBContacts;
const PxU32 totalStaticRigidJoints = bodyManager.mTotalStaticRBJoints;
mNumStaticRigidContactBatches = PxMin(maxStaticRigidContacts * nbRigidBatches, totalStaticRigidContacts);
mNumStaticRigid1dConstraintBatches = PxMin(maxStaticRigidJoints * nbRigidBatches, totalStaticRigidJoints);
mNumRigidStaticConstraintBatches = (mNumStaticRigidContactBatches + mNumStaticRigid1dConstraintBatches);
mNumArticConstraintBatches = mIncrementalPartition.getNbArtiConstraintBatches() + mIncrementalPartition.getNbArtiContactBatches();
const PxU32 nbArticBatches = (mArticulationCount + PXG_BATCH_SIZE - 1) / PXG_BATCH_SIZE;
const PxU32 maxStaticArticJoints = bodyManager.mMaxStaticArticJoints;
const PxU32 maxStaticArticContacts = bodyManager.mMaxStaticArticContacts;
const PxU32 totalStaticArticulationContacts = bodyManager.mTotalStaticArticContacts;
const PxU32 totalStaticArticulationJoints = bodyManager.mTotalStaticArticJoints;
mNumStaticArtiContactBatches = PxMin(maxStaticArticContacts * nbArticBatches, totalStaticArticulationContacts);
mNumStaticArti1dConstraintBatches = PxMin(maxStaticArticJoints * nbArticBatches, totalStaticArticulationJoints);
mNumArtiStaticConstraintBatches = (mNumStaticArtiContactBatches + mNumStaticArti1dConstraintBatches);
const PxU32 maxSelfArticJoints = bodyManager.mMaxSelfArticJoints;
const PxU32 maxSelfArticContacts = bodyManager.mMaxSelfArticContacts;
const PxU32 totalSelfArticulationContacts = bodyManager.mTotalSelfArticContacts;
const PxU32 totalSelfArticulationJoints = bodyManager.mTotalSelfArticJoints;
mNumSelfArtiContactBatches = PxMin(maxSelfArticContacts * nbArticBatches, totalSelfArticulationContacts);
mNumSelfArti1dConstraintBatches = PxMin(maxSelfArticJoints * nbArticBatches, totalSelfArticulationJoints);
mNumArtiSelfConstraintBatches = (mNumSelfArtiContactBatches + mNumSelfArti1dConstraintBatches);
const PxU64 allocationSizeConstraintBatchHeader = sizeof(PxgConstraintBatchHeader) * (mNumConstraintBatches + mNumRigidStaticConstraintBatches + mNumArticConstraintBatches + mNumArtiStaticConstraintBatches + mNumArtiSelfConstraintBatches);
const PxU64 allocationSizeConstraintBatchHeaderAligned = allocationSizeConstraintBatchHeader + alignment;
sizeNeeded += allocationSizeConstraintBatchHeaderAligned;
const PxU32 totalJoints = mIncrementalPartition.getTotalConstraints();
const PxU32 totalContacts = mIncrementalPartition.getTotalContacts();
const PxU32 totalArticulationJoints = mIncrementalPartition.getTotalArticulationConstraints();
const PxU32 totalArticulationContacts = mIncrementalPartition.getTotalArticulationContacts();
//Unique Indices layout is joint->contact->artiJoint->artiContact
const PxU64 allocationSizeUniqueIndices = (totalJoints + totalContacts + totalArticulationJoints
+ totalArticulationContacts + totalStaticArticulationJoints + totalStaticArticulationContacts + totalSelfArticulationContacts
+ totalSelfArticulationJoints + totalStaticRigidContacts + totalStaticRigidJoints) * sizeof(PxU32);
const PxU64 allocationSizeUniqueIndicesAligned = allocationSizeUniqueIndices + alignment;
sizeNeeded += allocationSizeUniqueIndicesAligned;
const PxU64 allocationSizeArticulationCount = mArticulationCount * 4 * sizeof(PxU32);
const PxU64 allocationSizeArticulationCountAligned = allocationSizeArticulationCount + alignment;
sizeNeeded += allocationSizeArticulationCountAligned;
const PxU64 allocationSizeBodyCount = mBodyCount * 2 * sizeof(PxU32);
const PxU64 allocationSizeBodyCountAligned = allocationSizeBodyCount + alignment;
sizeNeeded += allocationSizeBodyCountAligned;
// descriptors are part of the solvercore
sizeNeeded += mGpuSolverCore->getDescriptorsAllocationSize();
// phase 2 - actually allocate the memory
mPinnedMemoryAllocator->reserveAndGrow(static_cast<PxU32>(sizeNeeded));
#if PX_ENABLE_SIM_STATS
mSimStats.mGpuDynamicsTempBufferCapacity = PxMax(sizeNeeded,mSimStats.mGpuDynamicsTempBufferCapacity);
#else
PX_CATCH_UNDEFINED_ENABLE_SIM_STATS
#endif
mIslandContextPool = reinterpret_cast<PxgIslandContext*>(mPinnedMemoryAllocator->allocate(totalIslands * sizeof(PxgIslandContext), alignment));
mConstraintBatchHeaders = reinterpret_cast<PxgConstraintBatchHeader*>(mPinnedMemoryAllocator->allocate(allocationSizeConstraintBatchHeader, alignment));
mArticConstraintBatchHeaders = mConstraintBatchHeaders + mNumConstraintBatches;
mConstraintUniqueIndices = reinterpret_cast<PxU32*>(mPinnedMemoryAllocator->allocate(allocationSizeUniqueIndices, alignment));
mRigidStaticConstraintUniqueIndices = mConstraintUniqueIndices + totalJoints;
mArtiConstraintUniqueIndices = mRigidStaticConstraintUniqueIndices + totalStaticRigidJoints;
mArtiStaticConstraintUniqueIndices = mArtiConstraintUniqueIndices + totalArticulationJoints;
mArtiSelfConstraintUniqueIndices = mArtiStaticConstraintUniqueIndices + totalStaticArticulationJoints;
mContactUniqueIndices = mArtiSelfConstraintUniqueIndices + totalSelfArticulationJoints;
mRigidStaticContactUniqueIndices = mContactUniqueIndices + totalContacts;
mArtiContactUniqueIndices = mRigidStaticContactUniqueIndices + totalStaticRigidContacts;
mArtiStaticContactUniqueIndices = mArtiContactUniqueIndices + totalArticulationContacts;
mArtiSelfContactUniqueIndices = mArtiStaticContactUniqueIndices + totalStaticArticulationContacts;
mArtiStaticConstraintStartIndex = reinterpret_cast<PxU32*>(mPinnedMemoryAllocator->allocate(allocationSizeArticulationCount, alignment));
mArtiStaticConstraintCount = mArtiStaticConstraintStartIndex + mArticulationCount;
mArtiStaticContactStartIndex = mArtiStaticConstraintCount + mArticulationCount;
mArtiStaticContactCount = mArtiStaticContactStartIndex + mArticulationCount;
mRigidStaticConstraintStartIndex = reinterpret_cast<PxU32*>(mPinnedMemoryAllocator->allocate(allocationSizeBodyCount, alignment));
mRigidStaticConstraintCount = mRigidStaticConstraintStartIndex + mBodyCount;
mGpuSolverCore->allocatePinnedDescriptors(*mPinnedMemoryAllocator);
}
// PT: TODO: un-indent all of the above
void PxgGpuContext::doConstraintPrepGPU()
{
PX_PROFILE_ZONE("GpuDynamics.ConstraintPrep", 0);
/**
* Things to do in here:
* (1) constraint prep on GPU
*/
mGpuSolverCore->resetVelocities(mIsTGS);
mGpuSolverCore->nonRigidConstraintPrepare(mArticulationCount);
mGpuSolverCore->jointConstraintPrepareParallel(PxU32(mNum1dConstraintBatches + mNumStaticRigid1dConstraintBatches));
mGpuSolverCore->contactConstraintPrepareParallel(PxU32(mNumContactBatches + mNumStaticRigidContactBatches));
mGpuSolverCore->artiJointConstraintPrepare(PxU32(mNumArti1dConstraintBatches + mNumStaticArti1dConstraintBatches + mNumSelfArti1dConstraintBatches));
mGpuSolverCore->artiContactConstraintPrepare(PxU32(mNumArtiContactBatches + mNumStaticArtiContactBatches + mNumSelfArtiContactBatches));
mGpuArticulationCore->precomputeDependencies(PxMin(mIncrementalPartition.getNbPartitions(), mIncrementalPartition.getCombinedSlabMaxNbPartitions()));
}
void PxgGpuContext::doPreIntegrationGPU()
{
const PxU32 offset = 1 + mKinematicCount;
mGpuSolverCore->preIntegration(offset, mSolverBodyPool.size(), mDt, mGravity);
if(mIsTGS)
mIslandContextPool->mBiasCoefficient = PxMin(0.9f, 2.0f * PxSqrt(1.0f / mIslandContextPool->mNumPositionIterations));
}
void PxgGpuContext::doArticulationGPU()
{
if(mIsTGS)
{
mGpuArticulationCore->computeUnconstrainedVelocities(mArticulationStartIndex, mArticulationCount, mDt, mGravity, 1.0f/mLengthScale, mIsExternalForcesEveryTgsIterationEnabled, mRecomputeArticulationBlockFormat);
}
else
{
mGpuArticulationCore->computeUnconstrainedVelocities(mArticulationStartIndex, mArticulationCount, mDt, mGravity, 1.0f/mLengthScale, false, mRecomputeArticulationBlockFormat);
mGpuArticulationCore->setupInternalConstraints(mArticulationCount, mDt, mDt, 1.0f / mDt, false);
}
}
void PxgGpuContext::doSoftbodyGPU()
{
PxgSoftBodyCore* softBodyCore = static_cast<PxgSimulationController*>(mSimulationController)->getSoftBodyCore();
if(softBodyCore)
softBodyCore->updateTetraRotations();
}
void PxgGpuContext::doFEMClothGPU()
{
// "I quickly checked, and it currently only resets Lagrange multiplier lambda used in the PBD framework.
// For TGS, we don't use the Lagrange multiplier so no need to reset. Calling it on PGS only sounds okay to me."
if(!mIsTGS)
{
PxgFEMClothCore* femClothCore = static_cast<PxgSimulationController*>(mSimulationController)->getFEMClothCore();
if(femClothCore)
femClothCore->preIteration();
}
}
void PxgGpuContext::doConstraintPrePrepGPU()
{
if(mIsTGS)
{
//Kick off articulation internal constraint setup code. At this point, we know the iteration count so we
//know how large time-steps will be.
const PxReal stepDt = mDt / PxReal(mIslandContextPool->mNumPositionIterations);
mGpuArticulationCore->setupInternalConstraints(mArticulationCount, stepDt, mDt, 1.0f / stepDt, true);
}
doConstraintPrePrepGPUCommon(mHasForceThresholds);
}
void PxgPostSolveTask::runInternal()
{
mContext.doPostSolveTask(mCont);
}
//This class kicks off constraint solve on GPU
void PxgGpuTask::runInternal()
{
mContext.mGpuSolverCore->acquireContext();
mContext.doConstraintJointBlockPrePrepGPU();
mContext.doConstraintPrepGPU();
mContext.doConstraintSolveGPU(mMaxNodes, *mChangedHandleMap);
mContext.mGpuSolverCore->releaseContext();
}
void PxgGpuIntegrationTask::runInternal()
{
mContext.mGpuSolverCore->acquireContext();
//for articulation
mContext.doArticulationGPU();
//for soft body update rotation
mContext.doSoftbodyGPU();
//for FEM-cloth
mContext.doFEMClothGPU();
mContext.mGpuSolverCore->releaseContext();
}
void PxgGpuPrePrepTask::runInternal()
{
mContext.mGpuSolverCore->acquireContext();
mContext.doPreIntegrationGPU();
//for d6 joint
mContext.doConstraintPrePrepGPU();
PxgJointManager& jointManager = mContext.getSimulationController()->getJointManager();
jointManager.reserveMemory(Dy::MAX_CONSTRAINT_ROWS);
mContext.mGpuSolverCore->releaseContext();
mContext.cpuJointPrePrepTask(mCont);
}
void PxgGpuContext::updateBodyCore(PxBaseTask* continuation)
{
mPostSolveTask.setContinuation(continuation);
mPostSolveTask.removeReference();
}
//#define PXG_INCREMENTAL_SANITY_CHECKS
#if PX_ENABLE_ASSERTS
#ifdef PXG_INCREMENTAL_SANITY_CHECKS
template <typename T>
static bool noDuplicates(T* buffer, const PxU32 size)
{
for (PxU32 a = 0; a < size; ++a)
{
for (PxU32 b = 0; b < a; ++b)
{
if (buffer[a] == buffer[b])
return false;
}
}
return true;
}
#else
template <typename T>
static bool noDuplicates(T*, const PxU32)
{
return true;
}
#endif
#endif
static PX_FORCE_INLINE bool needsSolve(IG::IslandSim& islandSim, PxU32 bodyCount, PxU32 articulationCount)
{
const PxU32 particleCount = islandSim.getNbActiveNodes(IG::Node::ePARTICLESYSTEM_TYPE);
const PxU32 clothCount = islandSim.getNbActiveNodes(IG::Node::eDEFORMABLE_SURFACE_TYPE);
const PxU32 softBodyCount = islandSim.getNbActiveNodes(IG::Node::eDEFORMABLE_VOLUME_TYPE);
const bool needsSolve = (0 != bodyCount || 0 != articulationCount || particleCount || softBodyCount || clothCount);
return needsSolve;
}
void PxgGpuContext::update( Cm::FlushPool& flushPool, PxBaseTask* continuation, PxBaseTask* postPartitioningTask, PxBaseTask* /*lostTouchTask*/,
PxvNphaseImplementationContext* nphase, PxU32 /*maxPatchesPerCM*/, PxU32 /*maxArticulationLinks*/, PxReal dt,
const PxVec3& gravity, PxBitMapPinned& /*changedHandleMap*/)
{
mGpuSolverCore->acquireContext();
PxsContactManagerOutputIterator iterator = nphase->getContactManagerOutputs();
PxsContactManagerOutput* gpuContactManagerOutputs = nphase->getGPUContactManagerOutputBase();
mGPURestDistances = nphase->getGPURestDistances();
mGPUShapeInteractions = nphase->getGPUShapeInteractions();
mGPUTorsionalData = nphase->getGPUTorsionalData();
mSolvedThisFrame = false;
mOutputIterator = iterator;
PX_ASSERT(noDuplicates(nphase->getLostFoundPatchManagers(), nphase->getNbLostFoundPatchManagers()));
//First and foremost, we need to get a set of islands (bodies, constraints etc.)
//These will be parameters
IG::IslandSim& islandSim = mIslandManager.getAccurateIslandSim();
const PxU32 bodyCount = islandSim.getNbActiveNodes(IG::Node::eRIGID_BODY_TYPE);
const PxU32 articulationCount = islandSim.getNbActiveNodes(IG::Node::eARTICULATION_TYPE);
mGpuSolverCore->setGpuContactManagerOutputBase(gpuContactManagerOutputs);
if(!mIsTGS)
mGpuSolverCore->syncSimulationController(); // PT: for some reason it's located here in PGS
const PxU32 kinematicCount = islandSim.getNbActiveKinematics();
mKinematicCount = kinematicCount;
mArticulationCount = articulationCount;
mArticulationStartIndex = 1 + kinematicCount + bodyCount;
mRecomputeArticulationBlockFormat = getSimulationController()->getRecomputeArticulationBlockFormat();
mBodyCount = bodyCount;
mPinnedMemoryAllocator->reset();
#if PX_ENABLE_SIM_STATS
mSimStats.mNbActiveKinematicBodies = islandSim.getNbActiveKinematics();
mSimStats.mNbActiveDynamicBodies = islandSim.getNbActiveNodes(IG::Node::eRIGID_BODY_TYPE);
mSimStats.mNbActiveConstraints = islandSim.getNbActiveEdges(IG::Edge::eCONSTRAINT);
mSimStats.mNbPartitions = mIncrementalPartition.getNbPartitions();
#else
PX_CATCH_UNDEFINED_ENABLE_SIM_STATS
#endif
//mConstraintWriteBackStreamAllocator->reserve(sizeof(Dy::ConstraintWriteback) * nbConstraints);
mConstraintsPerPartition.forceSize_Unsafe(0);
mDt = dt;
mInvDt = 1.f / dt;
mGravity = gravity;
//mEnableStabilization = enableStabilization;
if(mIsTGS)
mGpuSolverCore->syncSimulationController();
{
PX_PROFILE_ZONE("Dynamics.allocateBodyBuffers", 0);
const PxU32 maxLinks = getSimulationController()->getSimulationCore()->getMaxArticulationLinks();
const PxU32 maxDofs = getSimulationController()->getSimulationCore()->getMaxArticulationDofs();
const PxU32 totalLinkJointRootStateByteSize =
PxgArticulationLinkJointRootStateData::computeStateDataBufferByteSizeAligned16(maxLinks, maxDofs, articulationCount);
if (totalLinkJointRootStateByteSize > mLinkAndJointAndRootStateDataPool.capacity())
{
mLinkAndJointAndRootStateDataPool.forceSize_Unsafe(0);
mLinkAndJointAndRootStateDataPool.reserve(totalLinkJointRootStateByteSize);
}
if (articulationCount > mArticulationSleepDataPool.capacity())
{
mArticulationSleepDataPool.forceSize_Unsafe(0);
mArticulationSleepDataPool.reserve(articulationCount);
}
if (articulationCount*2 > mInternalResidualPerArticulationVelIter.capacity())
{
mInternalResidualPerArticulationVelIter.forceSize_Unsafe(0);
mInternalResidualPerArticulationVelIter.reserve(articulationCount*2);
}
if (articulationCount*2 > mInternalResidualPerArticulationPosIter.capacity())
{
mInternalResidualPerArticulationPosIter.forceSize_Unsafe(0);
mInternalResidualPerArticulationPosIter.reserve(articulationCount*2);
}
mLinkAndJointAndRootStateDataPool.forceSize_Unsafe(totalLinkJointRootStateByteSize);
mArticulationSleepDataPool.forceSize_Unsafe(articulationCount);
mInternalResidualPerArticulationVelIter.forceSize_Unsafe(articulationCount * 2);
mInternalResidualPerArticulationPosIter.forceSize_Unsafe(articulationCount * 2);
//1: Allocate buffers for all bodies (kinematic + dynamic)
if ((kinematicCount + bodyCount + 1) > mSolverBodyPool.capacity())
{
//we don't need to dma up/back dynamic solver body data to gpu anymore. However, we still need to dma up static/kinematic solver body
const PxU32 totalBodyAlignedCounts = (kinematicCount + bodyCount + 31 + 1) & (~31);
mSolverBodyPool.forceSize_Unsafe(0);
mSolverBodyPool.reserve(totalBodyAlignedCounts);
mBody2WorldPool.forceSize_Unsafe(0);
mBody2WorldPool.reserve(totalBodyAlignedCounts);
mSolverBodyDataPool.forceSize_Unsafe(0);
mSolverBodySleepDataPool.forceSize_Unsafe(0);
mSolverBodySleepDataPool.reserve(totalBodyAlignedCounts);
mSolverTxIDataPool.forceSize_Unsafe(0);
mSolverTxIDataPool.reserve(totalBodyAlignedCounts);
}
if ((kinematicCount + bodyCount + 1 + articulationCount) > mActiveNodeIndex.capacity())
{
const PxU32 totalArticulationAlignedCounts = (kinematicCount + bodyCount + 1 + articulationCount + 31) & (~31);
mActiveNodeIndex.forceSize_Unsafe(0);
mActiveNodeIndex.reserve(totalArticulationAlignedCounts);
}
if ((kinematicCount + 31 + 1) > mSolverBodyDataPool.capacity())
{
mSolverBodyDataPool.reserve((kinematicCount + 31 + 1) & (~31));
}
mActiveNodeIndex.forceSize_Unsafe(1 + kinematicCount + bodyCount + articulationCount);
//Set up constraint batches
const PxU32 totalBodySize = 1 + kinematicCount + bodyCount;
mSolverBodyPool.forceSize_Unsafe(totalBodySize);
mBody2WorldPool.forceSize_Unsafe(totalBodySize);
//we don't need to create dynamic solver body data in cpu anymore
mSolverBodyDataPool.forceSize_Unsafe(1 + kinematicCount);
//we need to dma up static+kinematic part of the sleepData and we dma up the whole sleepData array
mSolverBodySleepDataPool.forceSize_Unsafe(totalBodySize);
mSolverTxIDataPool.forceSize_Unsafe(totalBodySize);
}
if (getEnableDirectGPUAPI())
{
getSimulationController()->getJointManager().reserveMemoryPreAddRemove();
}
if (needsSolve(islandSim, bodyCount, articulationCount))
{
//Set up gpu workloads early!!!
const PxNodeIndex* const PX_RESTRICT nodeIndices = islandSim.getActiveNodes(IG::Node::eRIGID_BODY_TYPE);
const PxNodeIndex* const PX_RESTRICT articulationNodeIndices = islandSim.getActiveNodes(IG::Node::eARTICULATION_TYPE);
PxMemCopy(mActiveNodeIndex.begin() + 1, islandSim.getActiveKinematics(), islandSim.getNbActiveKinematics() * sizeof(PxNodeIndex));
PxMemCopy(mActiveNodeIndex.begin() + 1 + kinematicCount, nodeIndices, sizeof(PxNodeIndex) * mBodyCount);
PxMemCopy(mActiveNodeIndex.begin() + mArticulationStartIndex, articulationNodeIndices, sizeof(PxNodeIndex) * mArticulationCount);
mActiveNodeIndex[0] = PxNodeIndex();
PxgSimulationController* controller = static_cast<PxgSimulationController*>(mSimulationController);
const PxU32 maxLinks = controller->getMaxLinks();
//DMA up the body data right now and any other data that might be available
mGpuSolverCore->allocateSolverBodyBuffers(mIslandManager.getNbNodeHandles() + 1, mActiveNodeIndex, mArticulationCount, maxLinks);
mSolvedThisFrame = true;
//solver task chain!
//Note - *all* work for *all* islands is processed in phases using a wide-model approach.
//This is friendlier for the GPU but can be more wasteful in terms of memory
mGpuTask.setContinuation(continuation);
mGpuPrePrepTask.setContinuation(&mGpuTask);
mPrepTask.setContinuation(&mGpuPrePrepTask);
mPreIntegrationTask.setContinuation(&mPrepTask);
mGpuIntegrationTask.setContinuation(&mGpuPrePrepTask);
//Set up world rigid body
mSolverBodyPool[0] = mWorldSolverBody;
mSolverBodyDataPool[0] = mWorldSolverBodyData;
mSolverTxIDataPool[0] = mWorldTxIData;
mSolverBodySleepDataPool[0] = mWorldSolverBodySleepData;
// these two are being launched immediately.
mGpuIntegrationTask.removeReference();
mPreIntegrationTask.removeReference();
}
// PT: when updateIncrementalIslands() is single-threaded this is a blocking call and we can use the
// partitioning data when it returns. This is not the case anymore with multi-threaded implementations.
// doConstraintPrePrepCommon() consumes the output of the incremental island building as part of mPrepTask
mIncrementalPartition.updateIncrementalIslands(
mIslandManager.getAccurateIslandSim(),
mIslandManager.getAuxCpuData(),
&flushPool, postPartitioningTask,
mOutputIterator, // PT: don't pass the local variable, it will go out of scope while the partitioning tasks are using it
getSimulationController()->getBodySimManager(),
getSimulationController()->getJointManager());
// PT: all the code after the updateIncrementalIslands() call has been moved to PxgGpuContext::updatePostPartitioning() where
// it can safely be executed after the potential updateIncrementalIslands() tasks are completed.
mGpuSolverCore->releaseContext();
}
void PxgGpuContext::updatePostPartitioning(PxBaseTask* lostTouchTask, PxvNphaseImplementationContext* /*nphase*/,
PxU32 maxPatchesPerCM, PxU32 /*maxArticulationLinks*/,
PxReal /*dt*/, const PxVec3& /*gravity*/, PxBitMapPinned& changedHandleMap)
{
mGpuSolverCore->acquireContext();
IG::IslandSim& islandSim = mIslandManager.getAccurateIslandSim();
const PxPinnedArray<PartitionIndexData>& partitionIndexDataIter = mIncrementalPartition.getPartitionIndexArray();
const PxPinnedArray<PartitionNodeData>& partitionNodeData = mIncrementalPartition.getPartitionNodeArray();
const PxPinnedArray<PxgSolverConstraintManagerConstants>& solverConstantData = mIncrementalPartition.getSolverConstants();
const PxInt32ArrayPinned& partitionStartBatchIndexIter = mIncrementalPartition.getStartSlabPerPartition();
const PxInt32ArrayPinned& partitionArticStartBatchIndexIter = mIncrementalPartition.getArticStartSlabPerPartition();
const PxInt32ArrayPinned& partitionJointBatchCountIter = mIncrementalPartition.getNbJointsPerPartition();
const PxInt32ArrayPinned& partitionArtiJointBatchCountIter = mIncrementalPartition.getNbArticJointsPerPartition();
const PxArray<PxU32>& npIndexArrayIter = mIncrementalPartition.getNpIndexArray();
PxInt32ArrayPinned& npIndexArrayStagingBuffer = mNodeIndicesStagingBuffer;
PxInt32ArrayPinned& islandIds = mIslandIds;
PxInt32ArrayPinned& islandStaticTouchCounts = mIslandStaticTouchCounts;
const PxU32 nbConstraints = islandSim.getNbActiveEdges(IG::Edge::eCONSTRAINT);
// At this point we are ready to allocate the pinned memory for the solver.
allocateTempPinnedSolverMemoryCommon();
const PxU32 bodyCount = mBodyCount;
const PxU32 kinematicCount = mKinematicCount;
const PxU32 articulationCount = mArticulationCount;
//Force all bodies into a single island. The GPU partitioning provides better work balancing between blocks than just using multiple islands.
PxgIslandContext& context = mIslandContextPool[0];
context.mBodyStartIndex = 1 + kinematicCount;
context.mBodyCount = bodyCount;
context.mArticulationCount = articulationCount;
context.mNumPositionIterations = context.mNumVelocityIterations = 0;
mNumIslandContextPool = 1;
//because updateIncrementalIslands add/remove joints based on activation
getSimulationController()->updateJointsAndSyncData();
//reset number of frozen/unfrozen shapes to be zero
mSimulationController->clear();
PxgJointManager& jointManager = getSimulationController()->getJointManager();
PX_ASSERT((jointManager.getCpuNbRigidConstraints() + jointManager.getCpuNbArtiConstraints() +
jointManager.getGpuNbActiveRigidConstraints() + jointManager.getGpuNbActiveArtiConstraints()) == nbConstraints);
PX_UNUSED(jointManager);
const PxU32 nbPatches = mIncrementalPartition.getTotalContacts(); // PT: same as what mIncrementalPartition.updateIncrementalIslands() returned
#if PX_ENABLE_ASSERTS
PxU32 accumulatedConstraints = mIncrementalPartition.getAccumulatedConstraintCount().size() == 0 ? 0 : mIncrementalPartition.getAccumulatedConstraintCount()[mIncrementalPartition.getAccumulatedConstraintCount().size() - 1];
PxU32 accumulatedArtiConstraints = mIncrementalPartition.getAccumulatedArtiConstraintCount().size() == 0 ? 0 : mIncrementalPartition.getAccumulatedArtiConstraintCount()[mIncrementalPartition.getAccumulatedArtiConstraintCount().size() - 1];
PX_ASSERT((nbPatches + islandSim.getNbActiveEdges(IG::Edge::eCONSTRAINT) + mIncrementalPartition.getTotalArticulationContacts()) == (accumulatedConstraints + accumulatedArtiConstraints + getSimulationController()->getBodySimManager().mTotalStaticArticJoints +
getSimulationController()->getBodySimManager().mTotalSelfArticJoints + getSimulationController()->getBodySimManager().mTotalStaticRBJoints));
#endif
{
PX_PROFILE_ZONE("Dynamics.allocateConstraintBuffers", 0);
//set the constraint batches number but we will do the actual memory allocation in doPartitionTask() method and free the excess amout in doConstraintPrePrepCommon(), so that
//we can make sure mConstraintBatches is the last element allocated in the pinned memory allocator, therefore, we can shrunk the excess memory safely
//mNumConstraintBatches = sentinel->constraints + sentinel->contactManagers;
PxgBodySimManager& bodyManager = getSimulationController()->getBodySimManager();
mNumContactManagers = nbPatches + bodyManager.mTotalStaticRBContacts;
mNum1DConstraints = nbConstraints + bodyManager.mTotalStaticRBJoints;
mThresholdStream->forceSize_Unsafe(0);
mThresholdStream->reserve(PxNextPowerOfTwo(mNumContactManagers));
mForceChangedThresholdStream->forceSize_Unsafe(0);
mForceChangedThresholdStream->reserve(PxNextPowerOfTwo(mNumContactManagers));
//Set up constraint batches
//If there is no work to do then we can do nothing at all.
// AD: this only works because we have the same if when setting up the task chain.
// it's also in a somewhat weird place. We should analyze the dependencies, is all of the work we're doing up to here actually
// required to happen even if we early-out here?
if (!needsSolve(islandSim, bodyCount, articulationCount))
{
mGpuSolverCore->releaseContext();
return;
}
//printf("NbarticBatches = %i, NbRigidBatches = %i\n", mIncrementalPartition.mNbArtiContactBatches, mIncrementalPartition.mNbContactBatches);
}
PxU32 descCount = 0;
PxU32 currentDescIndex = 0;
mGpuSolverCore->resetMemoryAllocator();
PxU32 totalEdges = mIslandManager.getNbEdgeHandles();
mTotalPreviousEdges = mTotalEdges;
mTotalEdges = totalEdges;
mGpuSolverCore->allocateFrictionPatchIndexStream(totalEdges * maxPatchesPerCM); //How many batches
mGpuSolverCore->allocateFrictionCounts(totalEdges);
currentDescIndex = mIncrementalPartition.getTotalConstraints() + mIncrementalPartition.getTotalContacts();
context.mDescCount = currentDescIndex;
context.mDescStartIndex = descCount;
descCount += currentDescIndex;
lostTouchTask->addReference();
mLostTouchTask = lostTouchTask;
npIndexArrayStagingBuffer.forceSize_Unsafe(0);
npIndexArrayStagingBuffer.reserve(npIndexArrayIter.size());
npIndexArrayStagingBuffer.forceSize_Unsafe(npIndexArrayIter.size());
islandIds.forceSize_Unsafe(0);
islandIds.reserve(islandSim.getNbNodes());
islandIds.forceSize_Unsafe(islandSim.getNbNodes());
islandStaticTouchCounts.forceSize_Unsafe(0);
islandStaticTouchCounts.reserve(islandSim.getNbIslands());
islandStaticTouchCounts.forceSize_Unsafe(islandSim.getNbIslands());
//npIndexArray might be changed in island gen while solver is running, so we need to double buffer it
PxMemCopy(npIndexArrayStagingBuffer.begin(), npIndexArrayIter.begin(), sizeof(PxU32) * npIndexArrayIter.size());
PxMemCopy(islandIds.begin(), islandSim.getIslandIds(), sizeof(PxU32) * islandSim.getNbNodes());
PxMemCopy(islandStaticTouchCounts.begin(), islandSim.getIslandStaticTouchCount(), sizeof(PxU32) * islandSim.getNbIslands());
const PxInt32ArrayPinned& nodeInteractions = mIncrementalPartition.getNodeInteractionCountArray();
mGpuSolverCore->gpuMemDMAUpContactData(mContactStreamAllocators[mCurrentContactStream],
PxToU32(mContactStreamPool.mSharedDataIndex),
mContactStreamPool.mSharedDataIndexGPU,
mPatchStreamAllocators[mCurrentContactStream],
PxToU32(mPatchStreamPool.mSharedDataIndex),
mPatchStreamPool.mSharedDataIndexGPU,
mNumContactManagers,
partitionIndexDataIter.begin(), partitionNodeData.begin(), solverConstantData.begin(), solverConstantData.size(), partitionIndexDataIter.size(),
partitionStartBatchIndexIter.begin(), partitionArticStartBatchIndexIter.begin(), partitionJointBatchCountIter.begin(), partitionArtiJointBatchCountIter.begin(),
partitionStartBatchIndexIter.size(),
mIncrementalPartition.getDestroyedContactEdgeIndices().begin(), mIncrementalPartition.getDestroyedContactEdgeIndices().size(),
npIndexArrayStagingBuffer.begin(), npIndexArrayStagingBuffer.size(),
/*jointManager.mGpuJointData, jointManager.mGpuJointPrePrep, gpuJointSize,*/ mConstraintWriteBackPool.size(),
islandIds.begin(), nodeInteractions.begin(), islandIds.size(), islandStaticTouchCounts.begin(), islandStaticTouchCounts.size());
mGpuSolverCore->releaseContext();
mGpuTask.setMaxNodesAndWordCounts(mIslandManager.getNbNodeHandles(), changedHandleMap);
//Now we have kicked off all the atom integration and pre-prep work, so we can permit the remaining phases of the solver to run...
//mPostSolveTask.removeReference();
mGpuTask.removeReference();
mGpuPrePrepTask.removeReference();
mPrepTask.removeReference();
}
}