2510 lines
109 KiB
C++
2510 lines
109 KiB
C++
// Redistribution and use in source and binary forms, with or without
|
|
// modification, are permitted provided that the following conditions
|
|
// are met:
|
|
// * Redistributions of source code must retain the above copyright
|
|
// notice, this list of conditions and the following disclaimer.
|
|
// * Redistributions in binary form must reproduce the above copyright
|
|
// notice, this list of conditions and the following disclaimer in the
|
|
// documentation and/or other materials provided with the distribution.
|
|
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
|
// contributors may be used to endorse or promote products derived
|
|
// from this software without specific prior written permission.
|
|
//
|
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
|
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
//
|
|
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
|
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
|
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
|
|
|
#include "PxgContext.h"
|
|
#include "cudamanager/PxCudaContext.h"
|
|
#include "common/PxProfileZone.h"
|
|
#include "PxgIslandContext.h"
|
|
#include "PxgSolverCore.h"
|
|
#include "PxvSimStats.h"
|
|
#include "DyConstraintPrep.h"
|
|
#include "PxgArticulationCore.h"
|
|
#include "PxgSoftBodyCore.h"
|
|
#include "PxgFEMClothCore.h"
|
|
#include "DyDeformableSurface.h"
|
|
#include "DyDeformableVolume.h"
|
|
#include "PxgSimulationCore.h"
|
|
#include "PxgPBDParticleSystemCore.h"
|
|
#include "DyIslandManager.h"
|
|
#include "CmFlushPool.h"
|
|
|
|
// PT: TODO: this doesn't compile anymore these days
|
|
//#undef PXG_CONTACT_VALIDATION
|
|
//#define PXG_CONTACT_VALIDATION 1
|
|
|
|
namespace physx
|
|
{
|
|
#if PXG_CONTACT_VALIDATION
|
|
#pragma warning(push)
|
|
#pragma warning(disable:4100)
|
|
static bool validateContactPairs(PxU32 startIndex, PxU32 endIndex, PxU32* uniqueIds, PxU32* npIds, PxsContactManagerOutputIterator& outputIter,
|
|
PxU8* basePatchPointer, PxU8* baseContactPointer)
|
|
{
|
|
for (PxU32 a = startIndex; a < endIndex; ++a)
|
|
{
|
|
PxU32 uniqueId = uniqueIds[a];
|
|
PxU32 npId = npIds[uniqueId];
|
|
PxsContactManagerOutput& output = outputIter.getContactManagerOutput(npId);
|
|
|
|
PxContactPatch* contactPatches = reinterpret_cast<PxContactPatch*>(output.contactPatches);
|
|
PxContact* contacts = reinterpret_cast<PxContact*>(output.contactPoints);
|
|
|
|
PX_ASSERT((contactPatches - reinterpret_cast<PxContactPatch*>(basePatchPointer)) < 655360);
|
|
PX_ASSERT((contacts - reinterpret_cast<PxContact*>(baseContactPointer)) < (3145728));
|
|
|
|
PX_ASSERT(output.nbPatches != 0);
|
|
PxU32 totalContact = 0;
|
|
for (PxU32 i = 0; i < output.nbPatches; ++i)
|
|
{
|
|
PxContactPatch& patch = contactPatches[i];
|
|
PX_ASSERT(patch.startContactIndex < output.nbContacts);
|
|
PX_ASSERT(patch.normal.isNormalized());
|
|
totalContact += patch.nbContacts;
|
|
}
|
|
|
|
for (PxU32 i = 0; i < output.nbContacts; ++i)
|
|
{
|
|
|
|
PX_ASSERT(contacts[i].contact.isFinite());
|
|
PX_ASSERT(PxIsFinite(contacts[i].separation));
|
|
}
|
|
PX_ASSERT(totalContact == output.nbContacts);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static bool validateConstraintPairs(PxU32 startIndex, PxU32 endIndex, PxU32* uniqueIds, PxU32* npIds, PxgConstraintPrePrep* constraintPrePrep, PxU32* solverBodyIndices)
|
|
{
|
|
for (PxU32 a = startIndex; a < endIndex; ++a)
|
|
{
|
|
PxU32 uniqueId = uniqueIds[a];
|
|
PxU32 npId = npIds[uniqueId];
|
|
PxgConstraintPrePrep& prePrep = constraintPrePrep[npId];
|
|
PX_ASSERT(prePrep.mNodeIndexA.index() == PX_INVALID_NODE || prePrep.mNodeIndexA.index() < 16000);
|
|
PX_ASSERT(prePrep.mNodeIndexB.index() == PX_INVALID_NODE || prePrep.mNodeIndexB.index() < 16000);
|
|
|
|
PX_ASSERT(prePrep.mNodeIndexA.index() == PX_INVALID_NODE || solverBodyIndices[prePrep.mNodeIndexA.index()] < 16000);
|
|
PX_ASSERT(prePrep.mNodeIndexB.index() == PX_INVALID_NODE || solverBodyIndices[prePrep.mNodeIndexB.index()] < 16000);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
#pragma warning(pop)
|
|
#endif
|
|
|
|
class PxgBatchArticulationStaticConstraintPrePrepTask : public Cm::Task
|
|
{
|
|
PX_NOCOPY(PxgBatchArticulationStaticConstraintPrePrepTask)
|
|
private:
|
|
|
|
PxU32* mStaticContactIndices;
|
|
PxU32* mStaticJointIndices;
|
|
PxU32* mStaticContactCounts;
|
|
PxU32* mStaticJointCounts;
|
|
PxU32* mSelfContactIndices;
|
|
PxU32* mSelfJointIndices;
|
|
PxU32* mSelfContactCounts;
|
|
PxU32* mSelfJointCounts;
|
|
const PxU32 mStartIndex;
|
|
const PxU32 mEndIndex;
|
|
PxNodeIndex* mNodeIndices;
|
|
PxgBodySimManager& mBodyManager;
|
|
const PxU32 mNbArticulations;
|
|
|
|
public:
|
|
|
|
static const PxU32 NbPerTask = 512;
|
|
|
|
PxgBatchArticulationStaticConstraintPrePrepTask(PxU64 context,
|
|
PxU32* staticContactIndices, PxU32* staticJointIndices, PxU32* staticContactCounts, PxU32* staticJointCounts,
|
|
PxU32* selfContactIndices, PxU32* selfJointIndices, PxU32* selfContactCounts, PxU32* selfJointCounts,
|
|
PxU32 startIndex, PxU32 endIndex, PxNodeIndex* nodeIndices, PxgBodySimManager& bodyManager,
|
|
PxU32 nbArticulations) :
|
|
Cm::Task(context),
|
|
mStaticContactIndices(staticContactIndices), mStaticJointIndices(staticJointIndices),
|
|
mStaticContactCounts(staticContactCounts), mStaticJointCounts(staticJointCounts),
|
|
mSelfContactIndices(selfContactIndices), mSelfJointIndices(selfJointIndices),
|
|
mSelfContactCounts(selfContactCounts), mSelfJointCounts(selfJointCounts),
|
|
mStartIndex(startIndex), mEndIndex(endIndex),
|
|
mNodeIndices(nodeIndices), mBodyManager(bodyManager), mNbArticulations(nbArticulations)
|
|
{
|
|
}
|
|
|
|
virtual const char* getName() const PX_OVERRIDE PX_FINAL
|
|
{
|
|
return "PxgBatchArticulationStaticConstraintPrePrepTask";
|
|
}
|
|
|
|
virtual void runInternal() PX_OVERRIDE PX_FINAL
|
|
{
|
|
const PxU32 stride = mNbArticulations;
|
|
|
|
//const PxU32 blockCount = (mNbArticulations + 31)/32;
|
|
|
|
for (PxU32 i = mStartIndex; i < mEndIndex; ++i)
|
|
{
|
|
const PxU32 nodeIndex = mNodeIndices[i].index();
|
|
|
|
PxgStaticConstraints& staticConstraints = mBodyManager.mStaticConstraints[nodeIndex];
|
|
const PxU32 staticContactCount = staticConstraints.mStaticContacts.size();
|
|
PxgStaticConstraint* uniqueIds = staticConstraints.mStaticContacts.begin();
|
|
|
|
mStaticContactCounts[i] = staticContactCount;
|
|
for (PxU32 a = 0, offset = i; a < staticContactCount; ++a, offset += stride)
|
|
{
|
|
mStaticContactIndices[offset] = uniqueIds[a].uniqueId;
|
|
}
|
|
|
|
const PxU32 staticJointCount = staticConstraints.mStaticJoints.size();
|
|
uniqueIds = staticConstraints.mStaticJoints.begin();
|
|
|
|
mStaticJointCounts[i] = staticJointCount;
|
|
for (PxU32 a = 0, offset = i; a < staticJointCount; ++a, offset += stride)
|
|
{
|
|
mStaticJointIndices[offset] = uniqueIds[a].uniqueId;
|
|
}
|
|
|
|
const PxU32 articIndex = mBodyManager.mNodeToRemapMap[nodeIndex];
|
|
|
|
PxgArticulationSelfConstraints& selfConstraints = mBodyManager.mArticulationSelfConstraints[articIndex];
|
|
|
|
const PxU32 selfContactCount = selfConstraints.mSelfContacts.size();
|
|
PxgSelfConstraint* selfIds = selfConstraints.mSelfContacts.begin();
|
|
|
|
mSelfContactCounts[i] = selfContactCount;
|
|
for (PxU32 a = 0, offset = i; a < selfContactCount; ++a, offset += stride)
|
|
{
|
|
mSelfContactIndices[offset] = selfIds[a].uniqueId;
|
|
}
|
|
|
|
const PxU32 selfJointCount = selfConstraints.mSelfJoints.size();
|
|
selfIds = selfConstraints.mSelfJoints.begin();
|
|
|
|
mSelfJointCounts[i] = selfJointCount;
|
|
for (PxU32 a = 0, offset = i; a < selfJointCount; ++a, offset += stride)
|
|
{
|
|
mSelfJointIndices[offset] = selfIds[a].uniqueId;
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
class PxgBatchRigidStaticConstraintPrePrepTask : public Cm::Task
|
|
{
|
|
PX_NOCOPY(PxgBatchRigidStaticConstraintPrePrepTask)
|
|
private:
|
|
|
|
PxU32* mStaticContactIndices;
|
|
PxU32* mStaticJointIndices;
|
|
PxU32* mStaticContactCounts;
|
|
PxU32* mStaticJointCounts;
|
|
const PxU32 mStartIndex;
|
|
const PxU32 mEndIndex;
|
|
PxNodeIndex* mNodeIndices;
|
|
PxgBodySimManager& mBodyManager;
|
|
const PxU32 mNbBodies;
|
|
|
|
public:
|
|
|
|
static const PxU32 NbPerTask = 256;
|
|
|
|
PxgBatchRigidStaticConstraintPrePrepTask(PxU64 context,
|
|
PxU32* staticContactIndices, PxU32* staticJointIndices, PxU32* staticContactCounts, PxU32* staticJointCounts,
|
|
PxU32 startIndex, PxU32 endIndex, PxNodeIndex* nodeIndices, PxgBodySimManager& bodyManager,
|
|
PxU32 nbBodies) :
|
|
Cm::Task(context),
|
|
mStaticContactIndices(staticContactIndices), mStaticJointIndices(staticJointIndices),
|
|
mStaticContactCounts(staticContactCounts), mStaticJointCounts(staticJointCounts),
|
|
mStartIndex(startIndex), mEndIndex(endIndex),
|
|
mNodeIndices(nodeIndices), mBodyManager(bodyManager), mNbBodies(nbBodies)
|
|
{
|
|
}
|
|
|
|
virtual const char* getName() const PX_OVERRIDE PX_FINAL
|
|
{
|
|
return "PxgBatchRigidStaticConstraintPrePrepTask";
|
|
}
|
|
|
|
virtual void runInternal() PX_OVERRIDE PX_FINAL
|
|
{
|
|
const PxU32 stride = mNbBodies;
|
|
|
|
for (PxU32 i = mStartIndex; i < mEndIndex; ++i)
|
|
{
|
|
const PxU32 nodeIndex = mNodeIndices[i].index();
|
|
|
|
PxgStaticConstraints& staticConstraints = mBodyManager.mStaticConstraints[nodeIndex];
|
|
const PxU32 staticContactCount = staticConstraints.mStaticContacts.size();
|
|
PxgStaticConstraint* uniqueIds = staticConstraints.mStaticContacts.begin();
|
|
|
|
mStaticContactCounts[i] = staticContactCount;
|
|
for (PxU32 a = 0, offset = i; a < staticContactCount; ++a, offset += stride)
|
|
{
|
|
mStaticContactIndices[offset] = uniqueIds[a].uniqueId;
|
|
}
|
|
|
|
const PxU32 staticJointCount = staticConstraints.mStaticJoints.size();
|
|
uniqueIds = staticConstraints.mStaticJoints.begin();
|
|
|
|
mStaticJointCounts[i] = staticJointCount;
|
|
for (PxU32 a = 0, offset = i; a < staticJointCount; ++a, offset += stride)
|
|
{
|
|
mStaticJointIndices[offset] = uniqueIds[a].uniqueId;
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
void PxgCpuConstraintPrePrepTask::runInternal()
|
|
{
|
|
PX_PROFILE_ZONE("GpuDynamics.PxgCpuJointPrePrepTask", 0);
|
|
PxU32 currentEdgeIndex = 0;
|
|
|
|
for (PxU32 a = 0; a < mNumBatches; ++a)
|
|
{
|
|
PxU32 descStride = PxMin(mNumEdges - currentEdgeIndex, PXG_BATCH_SIZE);
|
|
|
|
PxgConstraintBatchHeader& batchHeader = mBatchHeaders[a];
|
|
batchHeader.constraintType = PxgSolverConstraintDesc::eCONSTRAINT_1D;
|
|
batchHeader.mDescStride = PxU16(descStride);
|
|
batchHeader.mConstraintBatchIndex = mConstraintBlockStartIndex + a;
|
|
batchHeader.mStartPartitionIndex = mUniqueIdStartIndex + a * PXG_BATCH_SIZE;
|
|
batchHeader.mask = 0xFFFFFFFF; //Unused
|
|
|
|
#if PXG_CONTACT_VALIDATION
|
|
validateConstraintPairs(a, a + descStride, mEdgeIds + a, mNpIds, mConstraintPrePrep, mSolverBodyIndices);
|
|
#endif
|
|
|
|
currentEdgeIndex += descStride;
|
|
}
|
|
|
|
for (PxU32 a = 0; a < mNumEdges; ++a)
|
|
{
|
|
mPinnedEdgeIds[mUniqueIdStartIndex + a] = mEdgeIds[a + mStartEdgeIdx];
|
|
}
|
|
|
|
//PxMemCopy(mPinnedEdgeIds + mUniqueIdStartIndex, mEdgeIds, sizeof(PxU32) * mNumEdges);
|
|
}
|
|
|
|
void PxgCpuArtiConstraintPrePrepTask::runInternal()
|
|
{
|
|
PX_PROFILE_ZONE("GpuDynamics.PxgCpuArtiJointPrePrepTask", 0);
|
|
PxU32 currentEdgeIndex = 0;
|
|
for (PxU32 a = 0; a < mNumBatches; ++a)
|
|
{
|
|
PxgConstraintBatchHeader& batchHeader = mBatchHeaders[a];
|
|
PxU32 descStride = PxMin(mNumEdges - currentEdgeIndex, PXG_BATCH_SIZE);
|
|
|
|
batchHeader.constraintType = PxU16(mIsContact ? PxgSolverConstraintDesc::eARTICULATION_CONTACT : PxgSolverConstraintDesc::eARTICULATION_CONSTRAINT_1D);
|
|
batchHeader.mDescStride = PxU16(descStride);
|
|
batchHeader.mConstraintBatchIndex = mConstraintBlockStartIndex + a;
|
|
batchHeader.mStartPartitionIndex = mUniqueIdStartIndex + a * PXG_BATCH_SIZE;
|
|
batchHeader.mask = 0xFFFFFFFF; //Unused
|
|
|
|
#if PXG_CONTACT_VALIDATION
|
|
validateConstraintPairs(a, a + descStride, mEdgeIds + a, mNpIds, mConstraintPrePrep, mSolverBodyIndices);
|
|
#endif
|
|
currentEdgeIndex += descStride;
|
|
}
|
|
|
|
for (PxU32 a = 0; a < mNumEdges; ++a)
|
|
{
|
|
mPinnedEdgeIds[mUniqueIdStartIndex + a] = mEdgeIds[a + mStartEdgeIdx];
|
|
}
|
|
|
|
//PxMemCopy(mPinnedEdgeIds + mUniqueIdStartIndex, mEdgeIds, sizeof(PxU32) * mNumEdges);
|
|
}
|
|
|
|
void PxgCpuPrepTask::runInternal()
|
|
{
|
|
mContext.doConstraintPrePrepCommon(mCont);
|
|
}
|
|
|
|
PxgGpuContext::PxgGpuContext(Cm::FlushPool& flushPool, IG::SimpleIslandManager& islandManager, PxU32 maxNumPartitions, PxU32 maxNumStaticPartitions,
|
|
bool enableStabilization, bool useEnhancedDeterminism,
|
|
PxReal maxBiasCoefficient, PxvSimStats& simStats, PxgHeapMemoryAllocatorManager* heapMemoryManager, PxReal lengthScale, bool enableDirectGPUAPI, PxU64 contextID, bool isResidualReportingEnabled, bool isTGS) :
|
|
Dy::Context(islandManager, heapMemoryManager->mMappedMemoryAllocators, simStats, enableStabilization,
|
|
useEnhancedDeterminism, maxBiasCoefficient, lengthScale, contextID, isResidualReportingEnabled),
|
|
mTotalEdges(0), mTotalPreviousEdges(0),
|
|
mFlushPool(flushPool),
|
|
mSolvedThisFrame(false),
|
|
mIncrementalPartition(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators), maxNumPartitions, contextID),
|
|
mActiveNodeIndex(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
|
|
mSolverBodyPool(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
|
|
mBody2WorldPool(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
|
|
mLinkAndJointAndRootStateDataPool(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
|
|
mArticulationSleepDataPool(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
|
|
mInternalResidualPerArticulationVelIter(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
|
|
mInternalResidualPerArticulationPosIter(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
|
|
m1dConstraintBatchIndices(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
|
|
mContactConstraintBatchIndices(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
|
|
mArti1dConstraintBatchIndices(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
|
|
mArtiContactConstraintBatchIndices(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
|
|
mConstraintsPerPartition(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
|
|
mArtiConstraintsPerPartition(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
|
|
mSolverBodyDataPool(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
|
|
mSolverBodySleepDataPool(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
|
|
mSolverTxIDataPool(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
|
|
mCachedPositionIterations(0), mCachedVelocityIterations(0),
|
|
mArtiStaticContactCounts(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
|
|
mArtiStaticJointCounts(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
|
|
mArtiStaticContactIndices(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
|
|
mArtiStaticJointIndices(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
|
|
mArtiSelfContactCounts(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
|
|
mArtiSelfJointCounts(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
|
|
mArtiSelfContactIndices(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
|
|
mArtiSelfJointIndices(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
|
|
mRigidStaticContactCounts(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
|
|
mRigidStaticJointCounts(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
|
|
mRigidStaticContactIndices(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
|
|
mRigidStaticJointIndices(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
|
|
mNodeIndicesStagingBuffer(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
|
|
mIslandIds(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
|
|
mIslandStaticTouchCounts(PxVirtualAllocator(heapMemoryManager->mMappedMemoryAllocators)),
|
|
mIsTGS(isTGS),
|
|
mIsExternalForcesEveryTgsIterationEnabled(false),
|
|
mEnableDirectGPUAPI(enableDirectGPUAPI),
|
|
mRecomputeArticulationBlockFormat(false),
|
|
mEnforceConstraintWriteBackToHostCopy(false),
|
|
|
|
mPreIntegrationTask (*this),
|
|
mPrepTask (*this),
|
|
mGpuPrePrepTask (*this),
|
|
mGpuIntegrationTask (*this),
|
|
mGpuTask (*this),
|
|
mPostSolveTask (*this)
|
|
{
|
|
mGpuArticulationCore = NULL;
|
|
mGpuBp = NULL;
|
|
mGpuNpCore = NULL;
|
|
mGpuSoftBodyCore = NULL;
|
|
mGpuFEMClothCore = NULL;
|
|
mGpuSimulationCore = NULL;
|
|
mGpuSolverCore = NULL;
|
|
mGpuPBDParticleSystemCore = NULL;
|
|
|
|
mMaxNumStaticPartitions = maxNumStaticPartitions;
|
|
}
|
|
|
|
PxgGpuContext::~PxgGpuContext()
|
|
{
|
|
mGpuSolverCore->acquireContext();
|
|
|
|
PX_DELETE(mPinnedMemoryAllocator);
|
|
PX_DELETE(mContactStreamAllocators[0]);
|
|
PX_DELETE(mContactStreamAllocators[1]);
|
|
PX_DELETE(mPatchStreamAllocators[0]);
|
|
PX_DELETE(mPatchStreamAllocators[1]);
|
|
PX_DELETE(mForceStreamAllocator);
|
|
PX_DELETE(mFrictionPatchStreamAllocator);
|
|
|
|
mGpuSolverCore->releaseStreams();
|
|
|
|
mGpuSolverCore->releaseContext();
|
|
|
|
PX_DELETE(mThresholdStream);
|
|
PX_DELETE(mForceChangedThresholdStream);
|
|
|
|
PX_DELETE(mGpuArticulationCore);
|
|
PX_DELETE(mGpuSolverCore);
|
|
}
|
|
|
|
PxgSimulationController* PxgGpuContext::getSimulationController()
|
|
{
|
|
return static_cast<PxgSimulationController*>(mSimulationController);
|
|
}
|
|
|
|
void PxgGpuContext::setSimulationController(PxsSimulationController* simulationController)
|
|
{
|
|
mSimulationController = simulationController;
|
|
}
|
|
|
|
PxgParticleSystemCore* PxgGpuContext::getGpuParticleSystemCore()
|
|
{
|
|
return mGpuPBDParticleSystemCore;
|
|
}
|
|
|
|
void PxgGpuContext::mergeResults()
|
|
{
|
|
//Flip the current contact stream
|
|
mCurrentContactStream = 1 - mCurrentContactStream;
|
|
mContactStreamPool.mDataStream = mContactStreamAllocators[mCurrentContactStream]->mStart;
|
|
mPatchStreamPool.mDataStream = mPatchStreamAllocators[mCurrentContactStream]->mStart;
|
|
|
|
mContactStreamPool.mSharedDataIndex = 0;
|
|
mPatchStreamPool.mSharedDataIndex = 0;
|
|
mForceStreamPool.mSharedDataIndex = 0;
|
|
mFrictionPatchStreamPool.mSharedDataIndex = 0;
|
|
|
|
mContactStreamPool.mSharedDataIndexGPU = 0;
|
|
mPatchStreamPool.mSharedDataIndexGPU = 0;
|
|
mForceStreamPool.mSharedDataIndexGPU = 0;
|
|
mFrictionPatchStreamPool.mSharedDataIndexGPU = 0;
|
|
}
|
|
|
|
void PxgGpuContext::getDataStreamBase(void*& contactStreamBase, void*& patchStreamBase, void*& forceAndIndiceStreamBase)
|
|
{
|
|
return mGpuSolverCore->getDataStreamBase(contactStreamBase, patchStreamBase, forceAndIndiceStreamBase);
|
|
}
|
|
|
|
//this is the pre-prepare code for block format joints loaded from the non-block format joints
|
|
|
|
void PxgGpuContext::doConstraintJointBlockPrePrepGPU()
|
|
{
|
|
//DMA the joint pre-prepare data which constructs in CPU(not D6Joint) to GPU
|
|
|
|
// AD: This is not needed for direct-GPU API but downstream things are getting really complex and I cannot
|
|
// figure out which count I need to adjust to avoid crashing.
|
|
//if (!mEnableDirectGPUAPI)
|
|
{
|
|
PxgJointManager& jointManager = getSimulationController()->getJointManager();
|
|
|
|
if (jointManager.getCpuNbRigidConstraints() > 0)
|
|
{
|
|
mGpuSolverCore->gpuMemDMAUpJointData(jointManager.getCpuRigidConstraintData(), jointManager.getCpuRigidConstraintRows(), jointManager.getCpuRigidConstraintData().size(), jointManager.getGpuNbRigidConstraints(),
|
|
PxU32(jointManager.mNbCpuRigidConstraintRows));
|
|
}
|
|
|
|
if (jointManager.getCpuNbArtiConstraints() > 0)
|
|
{
|
|
mGpuSolverCore->gpuMemDMAUpArtiJointData(jointManager.getCpuArtiConstraintData(), jointManager.getCpuArtiConstraintRows(), jointManager.getCpuArtiConstraintData().size(), jointManager.getGpuNbArtiConstraints(),
|
|
PxU32(jointManager.mNbCpuArtiConstraintRows));
|
|
}
|
|
}
|
|
|
|
// maybe this is also not needed if we have direct-GPU?
|
|
mGpuSolverCore->jointConstraintBlockPrePrepParallel(mNumConstraintBatches + mNumRigidStaticConstraintBatches + mNumArticConstraintBatches + mNumArtiStaticConstraintBatches + mNumArtiSelfConstraintBatches);
|
|
}
|
|
|
|
void PxgGpuContext::doStaticArticulationConstraintPrePrep(physx::PxBaseTask* continuation, const PxU32 articulationConstraintBatchIndex, const PxU32 articulationContactBatchIndex)
|
|
{
|
|
PxgBodySimManager& bodyManager = getSimulationController()->getBodySimManager();
|
|
|
|
PxgIslandContext& island = mIslandContextPool[0];
|
|
|
|
const PxU32 articulationStartIndex = island.mBodyStartIndex + island.mBodyCount;
|
|
|
|
PxNodeIndex* nodeIndices = mActiveNodeIndex.begin() + articulationStartIndex;
|
|
|
|
//KS - TODO - revisit this and make it work with batching. Currently, it is disabled!
|
|
|
|
mArtiStaticConstraintBatchOffset = articulationConstraintBatchIndex;
|
|
mArtiStaticContactBatchOffset = articulationContactBatchIndex;
|
|
|
|
PX_PROFILE_ZONE("Articulation Static constraint", 0);
|
|
mArtiStaticContactCounts.resize(mArticulationCount);
|
|
mArtiStaticJointCounts.resize(mArticulationCount);
|
|
mArtiSelfContactCounts.resize(mArticulationCount);
|
|
mArtiSelfJointCounts.resize(mArticulationCount);
|
|
|
|
PxU32 maxArtiStaticContacts = bodyManager.mMaxStaticArticContacts;
|
|
PxU32 maxArtiStaticJoints = bodyManager.mMaxStaticArticJoints;
|
|
PxU32 maxArtiSelfContacts = bodyManager.mMaxSelfArticContacts;
|
|
PxU32 maxArtiSelfJoints = bodyManager.mMaxSelfArticJoints;
|
|
|
|
mArtiStaticContactIndices.resize(maxArtiStaticContacts * mArticulationCount);
|
|
mArtiStaticJointIndices.resize(maxArtiStaticJoints * mArticulationCount);
|
|
|
|
mArtiSelfContactIndices.resize(maxArtiSelfContacts * mArticulationCount);
|
|
mArtiSelfJointIndices.resize(maxArtiSelfJoints * mArticulationCount);
|
|
|
|
for (PxU32 i = 0; i < mArticulationCount; i += PxgBatchArticulationStaticConstraintPrePrepTask::NbPerTask)
|
|
{
|
|
PxU32 endIndex = PxMin(i + PxgBatchArticulationStaticConstraintPrePrepTask::NbPerTask, mArticulationCount);
|
|
|
|
PxgBatchArticulationStaticConstraintPrePrepTask* task = PX_PLACEMENT_NEW(mFlushPool.allocate(sizeof(PxgBatchArticulationStaticConstraintPrePrepTask)), PxgBatchArticulationStaticConstraintPrePrepTask)
|
|
(0, mArtiStaticContactIndices.begin(), mArtiStaticJointIndices.begin(), mArtiStaticContactCounts.begin(), mArtiStaticJointCounts.begin(),
|
|
mArtiSelfContactIndices.begin(), mArtiSelfJointIndices.begin(), mArtiSelfContactCounts.begin(), mArtiSelfJointCounts.begin(),
|
|
i, endIndex, nodeIndices, bodyManager, mArticulationCount);
|
|
|
|
task->setContinuation(continuation);
|
|
task->removeReference();
|
|
}
|
|
}
|
|
|
|
void PxgGpuContext::doStaticRigidConstraintPrePrep(physx::PxBaseTask* continuation)
|
|
{
|
|
PX_PROFILE_ZONE("Rigid Static constraint", 0);
|
|
PxgBodySimManager& bodyManager = getSimulationController()->getBodySimManager();
|
|
|
|
PxgIslandContext& island = mIslandContextPool[0];
|
|
|
|
const PxU32 bodyStartIndex = island.mBodyStartIndex;
|
|
|
|
PxNodeIndex* nodeIndices = mActiveNodeIndex.begin() + bodyStartIndex;
|
|
|
|
mRigidStaticContactCounts.resize(mBodyCount);
|
|
mRigidStaticJointCounts.resize(mBodyCount);
|
|
|
|
PxU32 maxRigidStaticContacts = bodyManager.mMaxStaticRBContacts;
|
|
PxU32 maxRigidStaticJoints = bodyManager.mMaxStaticRBJoints;
|
|
|
|
mRigidStaticContactIndices.resize(maxRigidStaticContacts * mBodyCount);
|
|
mRigidStaticJointIndices.resize(maxRigidStaticJoints * mBodyCount);
|
|
|
|
for (PxU32 i = 0; i < mBodyCount; i += PxgBatchArticulationStaticConstraintPrePrepTask::NbPerTask)
|
|
{
|
|
PxU32 endIndex = PxMin(i + PxgBatchArticulationStaticConstraintPrePrepTask::NbPerTask, mBodyCount);
|
|
|
|
PxgBatchRigidStaticConstraintPrePrepTask* task = PX_PLACEMENT_NEW(mFlushPool.allocate(sizeof(PxgBatchRigidStaticConstraintPrePrepTask)), PxgBatchRigidStaticConstraintPrePrepTask)
|
|
(0, mRigidStaticContactIndices.begin(), mRigidStaticJointIndices.begin(), mRigidStaticContactCounts.begin(), mRigidStaticJointCounts.begin(),
|
|
i, endIndex, nodeIndices, bodyManager, mBodyCount);
|
|
|
|
task->setContinuation(continuation);
|
|
task->removeReference();
|
|
}
|
|
}
|
|
|
|
void PxgGpuContext::doConstraintSolveGPU(PxU32 maxNodes, PxBitMapPinned& changedHandleMap)
|
|
{
|
|
/**
|
|
* Things to do in here:
|
|
* (1) Solve on GPU
|
|
* (2) Write-back on GPU
|
|
* (2) Integration on GPU (transforms are now on GPU solver body data so might as well use them)
|
|
*/
|
|
|
|
mGpuArticulationCore->syncStream();
|
|
|
|
mConstraintPositionIterResidualPoolGpu.resize(mConstraintWriteBackPool.size());
|
|
|
|
mGpuSolverCore->solveContactMultiBlockParallel(mIslandContextPool, mNumIslandContextPool,
|
|
mIncrementalPartition.getCombinedSlabMaxNbPartitions(), mConstraintsPerPartition, mArtiConstraintsPerPartition, mGravity,
|
|
mConstraintPositionIterResidualPoolGpu.begin(), mConstraintPositionIterResidualPoolGpu.size(), &mTotalContactError.mPositionIterationErrorAccumulator,
|
|
mArticulationContactErrorPosIter, mInternalResidualPerArticulationPosIter);
|
|
mContactErrorPosIter = &mTotalContactError.mPositionIterationErrorAccumulator;
|
|
|
|
if (mHasForceThresholds)
|
|
mGpuSolverCore->accumulatedForceThresholdStream(maxNodes + 1);
|
|
|
|
const PxU32 offset = 1 + mKinematicCount;
|
|
|
|
//KS - todo - use separate streams. In addition, read number of threshold streams before DMAing back data
|
|
mGpuSolverCore->gpuMemDMAbackSolverData(mForceStreamPool.mDataStream,
|
|
mForceStreamPool.mDataStreamSize - mForceStreamPool.mSharedDataIndex,
|
|
(PxU32)mForceStreamPool.mSharedDataIndex, (PxU32)mForceStreamPool.mSharedDataIndexGPU,
|
|
mForceChangedThresholdStream->begin(), mIncrementalPartition.hasForceThresholds(),
|
|
mConstraintWriteBackPool.begin(), mConstraintWriteBackPool.size(),
|
|
(!mEnableDirectGPUAPI || getSimulationController()->getEnableOVDCollisionReadback()), mContactErrorVelIter);
|
|
|
|
|
|
mGpuSolverCore->integrateCoreParallel(offset, mSolverBodyPool.size());
|
|
|
|
mGpuArticulationCore->updateBodies(mDt, !mIsTGS, mEnableDirectGPUAPI);
|
|
|
|
mSimulationController->update(changedHandleMap);
|
|
|
|
if (isResidualReportingEnabled())
|
|
mArticulationContactErrorVelIter.resize(1);
|
|
|
|
if (!mEnableDirectGPUAPI || getSimulationController()->getEnableOVDReadback())
|
|
{
|
|
mGpuArticulationCore->gpuMemDMAbackArticulation(mLinkAndJointAndRootStateDataPool, mArticulationSleepDataPool,
|
|
mInternalResidualPerArticulationVelIter, mArticulationContactErrorVelIter);
|
|
}
|
|
|
|
mGpuSolverCore->gpuMemDMAbackSolverBodies(reinterpret_cast<float4*>(mSolverBodyPool.begin()), mSolverBodyPool.size(), mBody2WorldPool,
|
|
mSolverBodySleepDataPool, mEnableDirectGPUAPI && (!getSimulationController()->getEnableOVDReadback()));
|
|
}
|
|
|
|
class PxgPostSolveWorkerTask : public Cm::Task
|
|
{
|
|
PxNodeIndex* mNodeIndices;
|
|
PxAlignedTransform* mBodyToWorldPool;
|
|
PxgSolverBodySleepData* mSolverBodySleepDataPool;
|
|
float4* mBodyVelocities;
|
|
PxU32 mNbBodies;
|
|
PxU32 mTotalBodies;
|
|
IG::IslandSim* mIslandSim;
|
|
|
|
public:
|
|
|
|
PxgPostSolveWorkerTask(PxNodeIndex* nodeIndices, PxAlignedTransform* bodyToWorldPool, PxgSolverBodySleepData* solverBodySleepDataPool, float4* bodyVelocities, PxU32 nbBodies, PxU32 totalBodies,
|
|
IG::IslandSim* islandSim) : Cm::Task(0),
|
|
mNodeIndices(nodeIndices), mBodyToWorldPool(bodyToWorldPool), mSolverBodySleepDataPool(solverBodySleepDataPool), mBodyVelocities(bodyVelocities), mNbBodies(nbBodies), mTotalBodies(totalBodies),
|
|
mIslandSim(islandSim)
|
|
{
|
|
}
|
|
|
|
virtual void runInternal() PX_OVERRIDE PX_FINAL
|
|
{
|
|
PX_PROFILE_ZONE("GpuDynamics.PxgPostSolveWorkerTask", 0);
|
|
|
|
// AD: skip this if we had GPU errors, will lead to asserts down below
|
|
// for signalling reasons we skip outside.
|
|
|
|
//copy data from PxgSolverBodyData to PxsBodyCore
|
|
for (PxU32 i = 0; i < mNbBodies; i++)
|
|
{
|
|
const PxU32 index = mNodeIndices[i].index();
|
|
//copy integration data
|
|
|
|
const PxgSolverBodySleepData& sleepData = mSolverBodySleepDataPool[i];
|
|
|
|
PxsRigidBody& originalBody = *getRigidBodyFromIG(*mIslandSim, PxNodeIndex(index));
|
|
|
|
PxsBodyCore& bodyCore = originalBody.getCore();
|
|
|
|
originalBody.mLastTransform = bodyCore.body2World;
|
|
const PxAlignedTransform& body2World = mBodyToWorldPool[i];
|
|
bodyCore.body2World = body2World.getTransform();
|
|
const float4& linVel = mBodyVelocities[i];
|
|
const float4& angVel = mBodyVelocities[i + mTotalBodies];
|
|
bodyCore.linearVelocity = PxVec3(linVel.x, linVel.y, linVel.z);
|
|
bodyCore.angularVelocity = PxVec3(angVel.x, angVel.y, angVel.z);
|
|
|
|
//copy sleep check data
|
|
bodyCore.solverWakeCounter = sleepData.wakeCounter;
|
|
originalBody.mInternalFlags = PxU8(sleepData.internalFlags);
|
|
|
|
PX_ASSERT(bodyCore.linearVelocity.isFinite());
|
|
PX_ASSERT(bodyCore.angularVelocity.isFinite());
|
|
}
|
|
}
|
|
|
|
virtual const char* getName() const PX_OVERRIDE PX_FINAL
|
|
{
|
|
return "PxgPostSolveWorkerTask";
|
|
}
|
|
|
|
private:
|
|
PX_NOCOPY(PxgPostSolveWorkerTask)
|
|
};
|
|
|
|
|
|
class PxgPostSolveArticulationTask : public Cm::Task
|
|
{
|
|
PxNodeIndex* mNodeIndices;
|
|
|
|
//see PxgArticulationLinkJointRootStateData
|
|
PxU8* mLinkAndJointAndRootStates;
|
|
Dy::ErrorAccumulator* mInternalResidualPerArticulationVelIter;
|
|
Dy::ErrorAccumulator* mInternalResidualPerArticulationPosIter;
|
|
|
|
PxgSolverBodySleepData* mSleepData;
|
|
PxU32 mNbArticulations;
|
|
PxU32 mArticulationStartIndex; //articulation offset in the nodeIndex
|
|
PxU32 mBatchStartIndex;
|
|
IG::SimpleIslandManager* mIslandManager;
|
|
PxU32 mMaxLinks;
|
|
PxU32 mMaxDofs;
|
|
PxReal mDt;
|
|
PxU32 mArticulationCount;
|
|
|
|
public:
|
|
|
|
PxgPostSolveArticulationTask(PxNodeIndex* nodeIndices, PxU8* linkAndJointAndRootStates, Dy::ErrorAccumulator* internalResidualPerArticulationPosIter,
|
|
Dy::ErrorAccumulator* internalResidualPerArticulationVelIter, PxgSolverBodySleepData* sleepData, PxU32 nbArticulation,
|
|
PxU32 articulationStartIndex,
|
|
IG::SimpleIslandManager* islandManager, const PxU32 batchStartIndex, const PxU32 maxLinks, const PxU32 maxDofs,
|
|
const PxReal dt, const PxU32 totalArticulationCount) :
|
|
Cm::Task(0), mNodeIndices(nodeIndices),
|
|
mLinkAndJointAndRootStates(linkAndJointAndRootStates),
|
|
mInternalResidualPerArticulationVelIter(internalResidualPerArticulationVelIter),
|
|
mInternalResidualPerArticulationPosIter(internalResidualPerArticulationPosIter),
|
|
mSleepData(sleepData),
|
|
mNbArticulations(nbArticulation),
|
|
mArticulationStartIndex(articulationStartIndex),
|
|
mBatchStartIndex(batchStartIndex),
|
|
mIslandManager(islandManager),
|
|
mMaxLinks(maxLinks), mMaxDofs(maxDofs),
|
|
mDt(dt), mArticulationCount(totalArticulationCount)
|
|
{
|
|
}
|
|
|
|
virtual void runInternal() PX_OVERRIDE PX_FINAL
|
|
{
|
|
PX_PROFILE_ZONE("GpuDynamics.PxgPostSolveArticulationTask", 0);
|
|
|
|
const PxU32 maxLinks = mMaxLinks;
|
|
const PxU32 maxDofs = mMaxDofs;
|
|
|
|
//copy data from PxgSolverBodyData to PxsBodyCore
|
|
const PxU32 endIndex = mBatchStartIndex + mNbArticulations;
|
|
|
|
IG::IslandSim& sim = mIslandManager->getAccurateIslandSim();
|
|
|
|
for (PxU32 a = mBatchStartIndex; a < endIndex; a++)
|
|
{
|
|
const PxU32 ind = a + mArticulationStartIndex;
|
|
|
|
PxNodeIndex nodeIndex = mNodeIndices[ind];
|
|
//const PxU32 nodeIndex = mNodeIndices[ind].index();
|
|
//copy integration data
|
|
|
|
Dy::FeatherstoneArticulation& articulation = *getArticulationFromIG(sim, nodeIndex);
|
|
Dy::ArticulationData& artiData = articulation.getArticulationData();
|
|
|
|
articulation.mInternalErrorAccumulatorPosIter = mInternalResidualPerArticulationPosIter[a];
|
|
articulation.mInternalErrorAccumulatorVelIter = mInternalResidualPerArticulationVelIter[a];
|
|
|
|
articulation.mContactErrorAccumulatorPosIter = mInternalResidualPerArticulationPosIter[a + mArticulationCount];
|
|
articulation.mContactErrorAccumulatorVelIter = mInternalResidualPerArticulationVelIter[a + mArticulationCount];
|
|
|
|
artiData.setDt(mDt);
|
|
|
|
const PxU32 numLinks = artiData.getLinkCount();
|
|
const PxU32 numDofs = artiData.getDofs();
|
|
|
|
//Get the address of the buffer holding the state data for the current articulation.
|
|
PxU8* singleArticulationStateBuffer =
|
|
PxgArticulationLinkJointRootStateData::getArticulationStateDataBuffer(
|
|
mLinkAndJointAndRootStates,
|
|
maxLinks, maxDofs, a);
|
|
|
|
//Decompose the buffer into its sub-arrays.
|
|
PxTransform* sBody2Worlds = NULL;
|
|
Cm::UnAlignedSpatialVector* sLinkVelocities = NULL;
|
|
Cm::UnAlignedSpatialVector* sLinkAccelerations = NULL;
|
|
Cm::UnAlignedSpatialVector* sLinkIncomingJointForces = NULL;
|
|
PxReal* sJointPositions = NULL;
|
|
PxReal* sJointVelocities = NULL;
|
|
PxReal* sJointAccels = NULL;
|
|
Cm::UnAlignedSpatialVector* sRootPreVel = NULL;
|
|
PxgArticulationLinkJointRootStateData::decomposeArticulationStateDataBuffer(
|
|
singleArticulationStateBuffer,
|
|
numLinks, numDofs,
|
|
sBody2Worlds, sLinkVelocities, sLinkAccelerations, sLinkIncomingJointForces,
|
|
sJointPositions, sJointVelocities, sJointAccels,
|
|
sRootPreVel);
|
|
|
|
Dy::ArticulationCore* core = articulation.getCore();
|
|
core->wakeCounter = mSleepData[a].wakeCounter;
|
|
|
|
if (mSleepData[a].internalFlags & PxsRigidBody::eACTIVATE_THIS_FRAME)
|
|
{
|
|
mIslandManager->getAccurateIslandSim().activateNode_ForGPUSolver(nodeIndex);
|
|
mIslandManager->getSpeculativeIslandSim().activateNode_ForGPUSolver(nodeIndex);
|
|
}
|
|
else if (mSleepData[a].internalFlags & PxsRigidBody::eDEACTIVATE_THIS_FRAME)
|
|
{
|
|
mIslandManager->getAccurateIslandSim().deactivateNode_ForGPUSolver(nodeIndex);
|
|
mIslandManager->getSpeculativeIslandSim().deactivateNode_ForGPUSolver(nodeIndex);
|
|
}
|
|
|
|
Dy::ArticulationLink* links = artiData.getLinks();
|
|
Cm::SpatialVectorF* linkVelocities = artiData.getMotionVelocities();
|
|
Cm::SpatialVectorF* linkAccelerations = artiData.getMotionAccelerations();
|
|
Cm::SpatialVectorF* linkIncomingJointForces = artiData.getLinkIncomingJointForces();
|
|
for (PxU32 i = 0; i < numLinks; ++i)
|
|
{
|
|
Dy::ArticulationLink& link = links[i];
|
|
PX_ASSERT(sBody2Worlds[i].isValid());
|
|
|
|
link.bodyCore->body2World = sBody2Worlds[i];
|
|
link.bodyCore->angularVelocity = sLinkVelocities[i].top;
|
|
link.bodyCore->linearVelocity = sLinkVelocities[i].bottom;
|
|
|
|
linkVelocities[i].top = sLinkVelocities[i].top;
|
|
linkVelocities[i].bottom = sLinkVelocities[i].bottom;
|
|
|
|
linkAccelerations[i].top = sLinkAccelerations[i].top;
|
|
linkAccelerations[i].bottom = sLinkAccelerations[i].bottom;
|
|
|
|
linkIncomingJointForces[i].top = sLinkIncomingJointForces[i].top;
|
|
linkIncomingJointForces[i].bottom = sLinkIncomingJointForces[i].bottom;
|
|
}
|
|
linkIncomingJointForces[0].top = PxVec3(PxZero);
|
|
linkIncomingJointForces[0].bottom = PxVec3(PxZero);
|
|
|
|
PxReal* jointPositions = artiData.getJointPositions();
|
|
PxReal* jointVelocities = artiData.getJointVelocities();
|
|
PxReal* jointAccelerations = artiData.getJointAccelerations();
|
|
for (PxU32 i = 0; i < numDofs; ++i)
|
|
{
|
|
jointPositions[i] = sJointPositions[i];
|
|
jointVelocities[i] = sJointVelocities[i];
|
|
jointAccelerations[i] = sJointAccels[i];
|
|
}
|
|
|
|
artiData.setRootPreMotionVelocity(*sRootPreVel);
|
|
}
|
|
}
|
|
|
|
virtual const char* getName() const PX_OVERRIDE PX_FINAL
|
|
{
|
|
return "PxgPostSolveArticulationTask";
|
|
}
|
|
|
|
private:
|
|
PX_NOCOPY(PxgPostSolveArticulationTask)
|
|
};
|
|
|
|
void PxgGpuContext::processPatches( Cm::FlushPool& flushPool, PxBaseTask* continuation,
|
|
PxsContactManager** lostFoundPatchManagers, PxU32 nbLostFoundPatchManagers, PxsContactManagerOutputCounts* outCounts)
|
|
{
|
|
mIncrementalPartition.processLostFoundPatches( flushPool, continuation, mIslandManager.getAccurateIslandSim(),
|
|
getSimulationController()->getBodySimManager(), getSimulationController()->getJointManager(),
|
|
lostFoundPatchManagers, nbLostFoundPatchManagers, outCounts);
|
|
}
|
|
|
|
void PxgGpuContext::doPostSolveTask(physx::PxBaseTask* continuation)
|
|
{
|
|
if (!mSolvedThisFrame)
|
|
return;
|
|
|
|
// AD: sneaky, but apparently only narrowphasecore has that member public.
|
|
if (getNarrowphaseCore()->mCudaContext->isInAbortMode())
|
|
return;
|
|
|
|
const PxU32 numParticleCores = mGpuParticleSystemCores.size();
|
|
for (PxU32 i = 0; i < numParticleCores; ++i)
|
|
{
|
|
PxgParticleSystemCore* core = mGpuParticleSystemCores[i];
|
|
const PxReal eps = 0.f;// mLengthScale * 1e-4f;
|
|
core->integrateSystems(mDt, eps*eps);
|
|
core->onPostSolve(); // call the callback.
|
|
}
|
|
|
|
PxU32 nbThresholdElems = 0;
|
|
mGpuSolverCore->syncDmaBack(nbThresholdElems);
|
|
mForceChangedThresholdStream->forceSize_Unsafe(nbThresholdElems);
|
|
|
|
if (!mEnableDirectGPUAPI || getSimulationController()->getEnableOVDReadback())
|
|
{
|
|
//TODO - multi-thread this!
|
|
const PxU32 offset = 1 + mKinematicCount;
|
|
|
|
PxPinnedArray<PxgSolverBody>& solverBodyIter = mSolverBodyPool;
|
|
|
|
float4* bodyVelocities = reinterpret_cast<float4*>(solverBodyIter.begin());
|
|
PxAlignedTransform* body2Worlds = mBody2WorldPool.begin();
|
|
PxNodeIndex* nodeIndices = mActiveNodeIndex.begin();
|
|
const PxU32 totalNumBodies = mSolverBodyPool.size();
|
|
|
|
const PxU32 batchSize = 512;
|
|
|
|
IG::IslandSim* accurateIslandSim = &mIslandManager.getAccurateIslandSim();
|
|
|
|
//write back the data to PxsBodyCore
|
|
for (PxU32 i = offset; i < totalNumBodies; i += batchSize)
|
|
{
|
|
PxgSolverBodySleepData* sleepData = &mSolverBodySleepDataPool[i];
|
|
|
|
PxgPostSolveWorkerTask* task = PX_PLACEMENT_NEW(mFlushPool.allocate(sizeof(PxgPostSolveWorkerTask)), PxgPostSolveWorkerTask)(nodeIndices + i, body2Worlds + i, sleepData, bodyVelocities + i,
|
|
PxMin(batchSize, totalNumBodies - i), totalNumBodies, accurateIslandSim);
|
|
|
|
task->setContinuation(continuation);
|
|
task->removeReference();
|
|
}
|
|
|
|
const PxU32 maxLinks = getSimulationController()->getSimulationCore()->getMaxArticulationLinks();
|
|
const PxU32 maxDofs = getSimulationController()->getSimulationCore()->getMaxArticulationDofs();
|
|
const PxU32 articulationBatchSize = PxMax(64u, (mArticulationCount + 127u) / 128u);
|
|
for (PxU32 i = 0; i < mArticulationCount; i += articulationBatchSize)
|
|
{
|
|
PxgPostSolveArticulationTask* task = PX_PLACEMENT_NEW(mFlushPool.allocate(sizeof(PxgPostSolveArticulationTask)), PxgPostSolveArticulationTask)(nodeIndices,
|
|
mLinkAndJointAndRootStateDataPool.begin(), mInternalResidualPerArticulationPosIter.begin(), mInternalResidualPerArticulationVelIter.begin(),
|
|
mArticulationSleepDataPool.begin(), PxMin(articulationBatchSize, mArticulationCount - i), mArticulationStartIndex, &mIslandManager, i,
|
|
maxLinks, maxDofs, mDt, mArticulationCount);
|
|
|
|
task->setContinuation(continuation);
|
|
task->removeReference();
|
|
}
|
|
}
|
|
|
|
mGpuSolverCore->acquireContext();
|
|
for (PxU32 i = 0; i < numParticleCores; ++i)
|
|
{
|
|
PxgParticleSystemCore* core = mGpuParticleSystemCores[i];
|
|
|
|
cuStreamQuery(core->getFinalizeStream()); //Flush particle work
|
|
}
|
|
|
|
mGpuSolverCore->releaseContext();
|
|
}
|
|
|
|
static void copyToSolverBodyStaticAndKinematic(PxgSolverBodyData& data, PxgSolverTxIData& txIData, const PxsBodyCore& core, PxNodeIndex nodeIndex)
|
|
{
|
|
// PT: not needed for statics/kinematics
|
|
// if(core.disableGravity)
|
|
// sleepData.internalFlags |= PxsRigidBody::eDISABLE_GRAVITY_GPU;
|
|
|
|
//This data has been moved to pxgbodysim
|
|
//data.inverseInertia = make_float4(core.inverseInertia.x, core.inverseInertia.y, core.inverseInertia.z, 0.f);
|
|
//PxU32 islandNodeIndex = nodeIndex << 2;
|
|
|
|
////Enable CCD...
|
|
//if (core.mFlags & PxRigidBodyFlag::eENABLE_SPECULATIVE_CCD)
|
|
// islandNodeIndex |= 1;
|
|
//if (originalBody.mInternalFlags & PxsRigidBody::eHAS_SURFACE_VELOCITY)
|
|
// islandNodeIndex |= 2;
|
|
|
|
data.islandNodeIndex = nodeIndex;
|
|
|
|
// Copy simple properties
|
|
data.initialLinVel = core.linearVelocity;
|
|
data.initialAngVel = core.angularVelocity;
|
|
|
|
txIData.sqrtInvInertia = PxMat33(PxZero);
|
|
txIData.deltaBody2World = PxTransform(PxIdentity);
|
|
|
|
PX_ASSERT(core.linearVelocity.isFinite());
|
|
PX_ASSERT(core.angularVelocity.isFinite());
|
|
|
|
data.invMass = core.inverseMass;
|
|
data.penBiasClamp = core.maxPenBias;
|
|
//data.writeIndex = PxgSolverBody::InvalidHandle;
|
|
|
|
data.reportThreshold = core.contactReportThreshold;
|
|
data.maxImpulse = core.maxContactImpulse;
|
|
data.offsetSlop = 0.0f;
|
|
data.body2World = PxAlignedTransform(core.body2World.p.x, core.body2World.p.y, core.body2World.p.z,
|
|
PxAlignedQuat(core.body2World.q.x, core.body2World.q.y, core.body2World.q.z, core.body2World.q.w));
|
|
|
|
data.flags = PxRigidBodyFlag::eKINEMATIC;
|
|
}
|
|
|
|
static void atomArticulationIntegration(const PxU32 numArticulations,
|
|
const PxNodeIndex* const PX_RESTRICT islandNodes,
|
|
IG::SimpleIslandManager& islandManager,
|
|
PxI32* maxPosIters, PxI32* maxVelIters)
|
|
{
|
|
PxU32 localMaxPosIter = 0, localMaxVelIter = 0;
|
|
for (PxU32 a = 0; a < numArticulations; ++a)
|
|
{
|
|
const PxNodeIndex nodeId = islandNodes[a];
|
|
//const PxU32 nodeIndex = nodeId.index();
|
|
|
|
Dy::FeatherstoneArticulation* artic = getArticulationFromIG(islandManager.getAccurateIslandSim(), nodeId);
|
|
|
|
const PxU16 iterCount = artic->getIterationCounts();
|
|
|
|
localMaxPosIter = PxMax<PxU32>(PxU32(iterCount & 0xff), localMaxPosIter);
|
|
localMaxVelIter = PxMax<PxU32>(PxU32(iterCount >> 8), localMaxVelIter);
|
|
}
|
|
|
|
PxAtomicMax(maxPosIters, (PxI32)localMaxPosIter);
|
|
PxAtomicMax(maxVelIters, (PxI32)localMaxVelIter);
|
|
}
|
|
|
|
class PxgSetupKinematicTask : public Cm::Task
|
|
{
|
|
const PxNodeIndex* const PX_RESTRICT mKinematicNodes;
|
|
PxNodeIndex* mActiveNodeIndex; //copy island node index into this list
|
|
const PxU32 mNumBodies;
|
|
IG::SimpleIslandManager& mIslandManager;
|
|
PxU32 mSolverBodyStartIndex;
|
|
|
|
PxgSolverBodyData* mSolverBodyDataPool;
|
|
PxgSolverBodySleepData* mSolverBodySleepDataPool;
|
|
PxgSolverTxIData* mSolverTxIData;
|
|
|
|
PX_NOCOPY(PxgSetupKinematicTask)
|
|
|
|
public:
|
|
|
|
PxgSetupKinematicTask(const PxNodeIndex* const PX_RESTRICT kinematicNodes, PxNodeIndex* activeNodeIndex, const PxU32 numBodies,
|
|
IG::SimpleIslandManager& islandManager, PxU32 solverBodyStartIndex, PxgSolverBodyData* solverBodyDataPool,
|
|
PxgSolverBodySleepData* solverBodySleepDataPool, PxgSolverTxIData* txIData) : Cm::Task(0), mKinematicNodes(kinematicNodes), mActiveNodeIndex(activeNodeIndex), mNumBodies(numBodies),
|
|
mIslandManager(islandManager), mSolverBodyStartIndex(solverBodyStartIndex), mSolverBodyDataPool(solverBodyDataPool),
|
|
mSolverBodySleepDataPool(solverBodySleepDataPool), mSolverTxIData(txIData)
|
|
{
|
|
}
|
|
|
|
virtual void runInternal() PX_OVERRIDE PX_FINAL
|
|
{
|
|
IG::IslandSim& islandSim = mIslandManager.getAccurateIslandSim();
|
|
|
|
//Set up solver bodies for any kinematic bodies
|
|
for (PxU32 i = 0; i < mNumBodies; i++)
|
|
{
|
|
PxsRigidBody& rigidBody = *getRigidBodyFromIG(islandSim, mKinematicNodes[i]);
|
|
const PxsBodyCore& core = rigidBody.getCore();
|
|
copyToSolverBodyStaticAndKinematic(mSolverBodyDataPool[i], mSolverTxIData[i], core, mKinematicNodes[i]);
|
|
//mActiveNodeIndex[mSolverBodyStartIndex + i] = mKinematicNodes[i];
|
|
rigidBody.saveLastCCDTransform();
|
|
}
|
|
}
|
|
|
|
virtual const char* getName() const PX_OVERRIDE PX_FINAL
|
|
{
|
|
return "PxgKinematicSetupTask";
|
|
}
|
|
};
|
|
|
|
class PxgAtomIntegrationTask : public Cm::Task
|
|
{
|
|
const PxNodeIndex* const PX_RESTRICT mIslandNodes;
|
|
const PxU32 mNumBodies;
|
|
PxI32* mMaxPosIters;
|
|
PxI32* mMaxVelIters;
|
|
IG::SimpleIslandManager& mIslandManager;
|
|
|
|
PX_NOCOPY(PxgAtomIntegrationTask)
|
|
|
|
public:
|
|
|
|
PxgAtomIntegrationTask(const PxNodeIndex* const PX_RESTRICT islandNodes, const PxU32 numBodies, PxI32* PX_RESTRICT maxPosIters, PxI32* PX_RESTRICT maxVelIters,
|
|
IG::SimpleIslandManager& islandManager) : Cm::Task(0),
|
|
mIslandNodes(islandNodes),
|
|
mNumBodies(numBodies), mMaxPosIters(maxPosIters), mMaxVelIters(maxVelIters),
|
|
mIslandManager(islandManager)
|
|
{
|
|
}
|
|
|
|
virtual void runInternal() PX_OVERRIDE PX_FINAL
|
|
{
|
|
PX_PROFILE_ZONE("GpuDynamics.PxgIntegrateTask", 0);
|
|
PxI32 localPosIters = 0; PxI32 localVelIters = 0;
|
|
IG::IslandSim& sim = mIslandManager.getAccurateIslandSim();
|
|
for (PxU32 i = 0; i < mNumBodies; ++i)
|
|
{
|
|
const PxNodeIndex nodeId = mIslandNodes[i];
|
|
//activeNodeIndex[startIndex] = nodeId;
|
|
PxsRigidBody& rigidBody = *getRigidBodyFromIG(sim, nodeId);
|
|
|
|
localPosIters = PxMax<PxI32>(PxI32(rigidBody.mSolverIterationCounts & 0xff), localPosIters);
|
|
localVelIters = PxMax<PxI32>(PxI32(rigidBody.mSolverIterationCounts >> 8), localVelIters);
|
|
}
|
|
|
|
PxAtomicMax(mMaxPosIters, localPosIters);
|
|
PxAtomicMax(mMaxVelIters, localVelIters);
|
|
}
|
|
|
|
virtual const char* getName() const PX_OVERRIDE PX_FINAL
|
|
{
|
|
return "PxgIntegrateTask";
|
|
}
|
|
};
|
|
|
|
class PxgArticulationAtomIntegrationTask : public Cm::Task
|
|
{
|
|
const PxNodeIndex* const PX_RESTRICT mIslandNodes;
|
|
|
|
const PxU32 mNumArticulations;
|
|
|
|
PxI32* mMaxPosIters;
|
|
PxI32* mMaxVelIters;
|
|
IG::SimpleIslandManager& mIslandManager;
|
|
|
|
PX_NOCOPY(PxgArticulationAtomIntegrationTask)
|
|
|
|
public:
|
|
|
|
PxgArticulationAtomIntegrationTask(
|
|
const PxNodeIndex* const PX_RESTRICT islandNodes,
|
|
const PxU32 numArticulations, PxI32* maxPosIters,
|
|
PxI32* maxVelIters,
|
|
IG::SimpleIslandManager& islandManager
|
|
) :
|
|
Cm::Task(0), mIslandNodes(islandNodes),
|
|
mNumArticulations(numArticulations),
|
|
mMaxPosIters(maxPosIters),
|
|
mMaxVelIters(maxVelIters),
|
|
mIslandManager(islandManager)
|
|
{
|
|
}
|
|
|
|
virtual void runInternal() PX_OVERRIDE PX_FINAL
|
|
{
|
|
PX_PROFILE_ZONE("GpuDynamics.PxgArticulationAtomIntegrationTask", 0);
|
|
atomArticulationIntegration(mNumArticulations, mIslandNodes,
|
|
mIslandManager, mMaxPosIters, mMaxVelIters);
|
|
}
|
|
|
|
virtual const char* getName() const PX_OVERRIDE PX_FINAL
|
|
{
|
|
return "PxgArticulationAtomIntegrationTask";
|
|
}
|
|
};
|
|
|
|
void PxgGpuContext::doPreIntegrationTaskCommon(physx::PxBaseTask* continuation)
|
|
{
|
|
// AD: this task currently assumes we only have 1 solver island. If there is a variable amount of islands,
|
|
// the dependency chain needs to be fixed, because this task runs in parallel to allocating and setting
|
|
// the members of mIslandContextPool. (see Pxg(TGS)DynamicsContext::update()).
|
|
|
|
mNumContactBatches = 0;
|
|
mNum1dConstraintBatches = 0;
|
|
mNumArtiContactBatches = 0;
|
|
mNumArti1dConstraintBatches = 0;
|
|
|
|
mArtiStaticConstraintBatchOffset = 0;
|
|
mArtiStaticContactBatchOffset = 0;
|
|
|
|
const IG::IslandSim& islandSim = mIslandManager.getAccurateIslandSim();
|
|
|
|
const PxU32 workerCount = PxMax(1u, continuation->getTaskManager()->getCpuDispatcher()->getWorkerCount());
|
|
|
|
const PxU32 atomBatchSize = PxMax(256u, PxMin(1024u, (mBodyCount + workerCount - 1) / workerCount));
|
|
|
|
const PxNodeIndex* const PX_RESTRICT nodeIndices = islandSim.getActiveNodes(IG::Node::eRIGID_BODY_TYPE);
|
|
|
|
mGpuSolverCore->acquireContext();
|
|
|
|
const PxNodeIndex* const PX_RESTRICT articulationNodeIndices = islandSim.getActiveNodes(IG::Node::eARTICULATION_TYPE);
|
|
|
|
//Because we need to put the articulation active node index into the same list as mActiveNodeIndex, so we need to make sure
|
|
//articulation active node index start in the right place. In the active node index list, we start with static + kinematic +
|
|
//active rigid bodies + active articulations
|
|
//const PxU32 articulationStartIndex = island.mBodyStartIndex + island.mBodyCount;
|
|
|
|
if (isStateDirty())
|
|
{
|
|
mCachedPositionIterations = 0;
|
|
mCachedVelocityIterations = 0;
|
|
|
|
//Loop through and fill in properties from all the rigid bodies...
|
|
for (PxU32 a = 0; a < mBodyCount; a += atomBatchSize)
|
|
{
|
|
PxgAtomIntegrationTask* task = static_cast<PxgAtomIntegrationTask*>(mFlushPool.allocate(sizeof(PxgAtomIntegrationTask)));
|
|
|
|
task = PX_PLACEMENT_NEW(task, PxgAtomIntegrationTask)(nodeIndices + a, PxMin(atomBatchSize, mBodyCount - a), &mCachedPositionIterations,
|
|
&mCachedVelocityIterations, mIslandManager);
|
|
|
|
task->setContinuation(continuation);
|
|
task->removeReference();
|
|
}
|
|
|
|
setStateDirty(false);
|
|
|
|
const PxU32 articulationBatchSize = 1024u;
|
|
|
|
for (PxU32 a = 0; a < mArticulationCount; a += articulationBatchSize)
|
|
{
|
|
PxgArticulationAtomIntegrationTask* task = static_cast<PxgArticulationAtomIntegrationTask*>(mFlushPool.allocate(sizeof(PxgArticulationAtomIntegrationTask)));
|
|
|
|
task = PX_PLACEMENT_NEW(task, PxgArticulationAtomIntegrationTask)(
|
|
articulationNodeIndices + a,
|
|
PxMin(articulationBatchSize, mArticulationCount - a), &mCachedPositionIterations,
|
|
&mCachedVelocityIterations, mIslandManager);
|
|
|
|
task->setContinuation(continuation);
|
|
task->removeReference();
|
|
}
|
|
}
|
|
|
|
const PxU32 kinematicBatchSize = 1024u;
|
|
const PxNodeIndex*const kinematicIndices = islandSim.getActiveKinematics();
|
|
|
|
for (PxU32 a = 0; a < mKinematicCount; a += kinematicBatchSize)
|
|
{
|
|
PxgSetupKinematicTask* task = PX_PLACEMENT_NEW(mFlushPool.allocate(sizeof(PxgSetupKinematicTask)), PxgSetupKinematicTask)
|
|
(kinematicIndices + a, mActiveNodeIndex.begin(), PxMin(mKinematicCount - a, kinematicBatchSize), mIslandManager, a + 1, mSolverBodyDataPool.begin() + a + 1,
|
|
mSolverBodySleepDataPool.begin() + a + 1, mSolverTxIDataPool.begin() + a + 1);
|
|
task->setContinuation(continuation);
|
|
task->removeReference();
|
|
}
|
|
|
|
PxgSimulationController* gpuSimController = static_cast<PxgSimulationController*>(mSimulationController);
|
|
//const PxU32 numParticles = gpuSimController->getNbParticleSystems();
|
|
|
|
PxgBodySimManager& bodySimManager = gpuSimController->getBodySimManager();
|
|
void** bodySimsLL = bodySimManager.mBodies.begin();
|
|
|
|
PxI32 maxPosIters = 0, maxVelIters = 0;
|
|
|
|
const PxU32 numParticleCores = mGpuParticleSystemCores.size();
|
|
for (PxU32 i = 0; i < numParticleCores; ++i)
|
|
{
|
|
PxgParticleSystemCore* particleCore = mGpuParticleSystemCores[i];
|
|
particleCore->getMaxIterationCount(bodySimManager, maxPosIters, maxVelIters);
|
|
}
|
|
|
|
{
|
|
//Need to implement soft body
|
|
PxU32* softBodyNodeIndex = gpuSimController->getSoftBodyNodeIndex();
|
|
|
|
const PxU32 nbActiveSoftbodies = bodySimManager.mActiveSoftbodies.size();
|
|
PxU32* activeSoftbodies = bodySimManager.mActiveSoftbodies.begin();
|
|
|
|
for (PxU32 i = 0; i < nbActiveSoftbodies; ++i)
|
|
{
|
|
const PxU32 index = activeSoftbodies[i];
|
|
const PxU32 nodeIdex = softBodyNodeIndex[index];
|
|
Dy::DeformableVolume* dySoftBody = reinterpret_cast<Dy::DeformableVolume*>(bodySimsLL[nodeIdex]);
|
|
|
|
const PxU16 solverIterationCounts = dySoftBody->getIterationCounts();
|
|
|
|
maxPosIters = PxMax(PxI32(solverIterationCounts & 0xff), maxPosIters);
|
|
maxVelIters = PxMax(PxI32(solverIterationCounts >> 8), maxVelIters);
|
|
}
|
|
}
|
|
|
|
{
|
|
// FEM cloth
|
|
PxU32* femClothNodeIndex = gpuSimController->getFEMClothNodeIndex();
|
|
|
|
const PxU32 nbActiveFEMCloths = bodySimManager.mActiveFEMCloths.size();
|
|
PxU32* activeFEMCloths = bodySimManager.mActiveFEMCloths.begin();
|
|
|
|
for (PxU32 i = 0; i < nbActiveFEMCloths; ++i)
|
|
{
|
|
const PxU32 index = activeFEMCloths[i];
|
|
const PxU32 nodeIdex = femClothNodeIndex[index];
|
|
Dy::DeformableSurface* dyFEMCloth = reinterpret_cast<Dy::DeformableSurface*>(bodySimsLL[nodeIdex]);
|
|
|
|
const PxU16 solverIterationCounts = dyFEMCloth->getIterationCounts();
|
|
|
|
maxPosIters = PxMax(PxI32(solverIterationCounts & 0xff), maxPosIters);
|
|
//maxVelIters = PxMax(PxI32(solverIterationCounts >> 8), maxVelIters);
|
|
}
|
|
}
|
|
|
|
PxAtomicMax(&mCachedPositionIterations, maxPosIters);
|
|
PxAtomicMax(&mCachedVelocityIterations, maxVelIters);
|
|
|
|
mGpuSolverCore->releaseContext();
|
|
}
|
|
|
|
void PxgGpuContext::doConstraintPrePrepCommon(physx::PxBaseTask* continuation)
|
|
{
|
|
mGpuSolverCore->acquireContext();
|
|
|
|
m1dConstraintBatchIndices.forceSize_Unsafe(0);
|
|
m1dConstraintBatchIndices.reserve(mIncrementalPartition.getNbConstraintBatches() + mNumStaticRigid1dConstraintBatches);
|
|
|
|
mContactConstraintBatchIndices.forceSize_Unsafe(0);
|
|
mContactConstraintBatchIndices.reserve(mIncrementalPartition.getNbContactBatches() + mNumStaticRigidContactBatches);
|
|
|
|
mArti1dConstraintBatchIndices.forceSize_Unsafe(0);
|
|
mArti1dConstraintBatchIndices.reserve(mIncrementalPartition.getNbArtiConstraintBatches() + mNumStaticArti1dConstraintBatches + mNumSelfArti1dConstraintBatches);
|
|
|
|
mArtiContactConstraintBatchIndices.forceSize_Unsafe(0);
|
|
mArtiContactConstraintBatchIndices.reserve(mIncrementalPartition.getNbArtiContactBatches() + mNumStaticArtiContactBatches + mNumSelfArtiContactBatches);
|
|
|
|
mIslandContextPool[0].mNumPositionIterations = mCachedPositionIterations;
|
|
mIslandContextPool[0].mNumVelocityIterations = mCachedVelocityIterations;
|
|
|
|
mNum1dConstraintBatches = (PxI32)mIncrementalPartition.getNbConstraintBatches();
|
|
mNumContactBatches = (PxI32)mIncrementalPartition.getNbContactBatches();
|
|
mNumArtiContactBatches = (PxI32)mIncrementalPartition.getNbArtiContactBatches();
|
|
mNumArti1dConstraintBatches = (PxI32)mIncrementalPartition.getNbArtiConstraintBatches();
|
|
|
|
PxgBodySimManager& bodyManager = getSimulationController()->getBodySimManager();
|
|
const PxU32 nbStaticSlabs = (PxMax(bodyManager.mMaxStaticRBJoints, bodyManager.mMaxStaticRBContacts) + mMaxNumStaticPartitions - 1) / mMaxNumStaticPartitions;
|
|
|
|
const PxU32 maxCombinedSlabPartitions = mIncrementalPartition.getCombinedSlabMaxNbPartitions();
|
|
|
|
mGpuSolverCore->gpuMemDmaUpBodyData(mSolverBodyDataPool, mSolverTxIDataPool, mIslandManager.getNbNodeHandles() + 1,
|
|
mNumConstraintBatches, mNumArticConstraintBatches, PxMax(1u, (mIncrementalPartition.getNbPartitions() + maxCombinedSlabPartitions - 1) / maxCombinedSlabPartitions),
|
|
nbStaticSlabs, mMaxNumStaticPartitions);
|
|
|
|
//Allocate enough space for the friction patches now that we know how many we need after constraint partitioning
|
|
{
|
|
PX_PROFILE_ZONE("GpuDynamics.allocateFrictionPatchStreams", 0);
|
|
mGpuSolverCore->allocateFrictionPatchStream(mNumContactBatches + mNumStaticRigidContactBatches, mNumArtiContactBatches + mNumStaticArtiContactBatches + mNumSelfArtiContactBatches);
|
|
}
|
|
|
|
mNum1DConstraintBlockPrepPool = (PxU32)mNum1dConstraintBatches;
|
|
|
|
const PxU32 nbConstraintsPerBatch = mIsTGS ? PxgCpuConstraintPrePrepTask::NbConstraintsPerTaskTGS : PxgCpuConstraintPrePrepTask::NbConstraintsPerTaskPGS; //Each task processed up to PxgCpuConstraintPrePrepTask::NbConstraintsPerTask constraints of a certain type
|
|
const PxU32 nbArtiConstraintsPerBatch = mIsTGS ? PxgCpuArtiConstraintPrePrepTask::NbConstraintsPerTaskTGS : PxgCpuArtiConstraintPrePrepTask::NbConstraintsPerTaskPGS;
|
|
|
|
PxU32 constraintBatchIndex = 0;
|
|
PxU32 contactBatchIndex = 0;
|
|
PxU32 articulationConstraintBatchIndex = mNum1dConstraintBatches;
|
|
PxU32 articulationContactBatchIndex = mNumContactBatches;
|
|
|
|
const PxU32 batchMask = PXG_BATCH_SIZE - 1;
|
|
|
|
mHasForceThresholds = mIncrementalPartition.hasForceThresholds();
|
|
|
|
const PxInt32ArrayPinned& startSlabIter = mIncrementalPartition.getStartSlabPerPartition();
|
|
const PxInt32ArrayPinned& articstartSlabIter = mIncrementalPartition.getArticStartSlabPerPartition();
|
|
|
|
PxgJointManager& jointManager = static_cast<PxgSimulationController*>(mSimulationController)->getJointManager();
|
|
const PxPinnedArray<PxgConstraintPrePrep>& rigidPreprepIter = jointManager.getGpuRigidJointPrePrep();
|
|
const PxPinnedArray<PxgConstraintPrePrep>& artiPreprepIter = jointManager.getGpuArtiJointPrePrep();
|
|
|
|
//The code below iterates over all partitions, producing tasks to fill in data.
|
|
|
|
//Running indices
|
|
|
|
PxU32 startIdx = 0; //Which partition to start at
|
|
PxU32 startBatchOffset = 0; //Batch offset within the partition
|
|
PxU32 startOffset = 0; //Constraint offset within the partition
|
|
PxU32 runningContactCount = 0; //The running count of the number of contact constraints that will be processed by the next task
|
|
PxU32 runningBatchCount = 0; //The running count of the number of batches that will be processed by the next task
|
|
|
|
{
|
|
PX_PROFILE_ZONE("Process Partitions", 0);
|
|
for (PxU32 i = 0; i < mIncrementalPartition.getNbPartitions(); ++i) // this is looping over "true" partitions, not the combined ones for the solver
|
|
{
|
|
const Partition& partition = mIncrementalPartition.getPartitionSlabs()[i / PXG_BATCH_SIZE]->mPartitions[i&(PXG_BATCH_SIZE - 1)];
|
|
const PxU32 nbContacts = partition.mPartitionIndices[PxgEdgeType::eCONTACT_MANAGER].size();
|
|
const PxU32 nbConstraints = partition.mPartitionIndices[PxgEdgeType::eCONSTRAINT].size();
|
|
const PxU32 nbArtiContacts = partition.mPartitionIndices[PxgEdgeType::eARTICULATION_CONTACT].size();
|
|
const PxU32 nbArtiConstraints = partition.mPartitionIndices[PxgEdgeType::eARTICULATION_CONSTRAINT].size();
|
|
//PxU32* constraintIds = partition.mPartitionIndices[IG::Edge::eCONSTRAINT].begin();
|
|
const PartitionIndices& constraintIds = partition.mPartitionIndices[PxgEdgeType::eCONSTRAINT];
|
|
const PartitionIndices& artiConstraintIds = partition.mPartitionIndices[PxgEdgeType::eARTICULATION_CONSTRAINT];
|
|
const PartitionIndices& artiContactIds = partition.mPartitionIndices[PxgEdgeType::eARTICULATION_CONTACT];
|
|
const PxU32 jointStartIndex = mIncrementalPartition.getJointStartIndices()[i];
|
|
|
|
PxU32 batchIndex = startSlabIter[i];
|
|
PxU32 localArticBatchIndex = articstartSlabIter[i];
|
|
PxU32 batchOffset = 0;
|
|
|
|
for (PxU32 a = 0; a < nbConstraints; a += nbConstraintsPerBatch)
|
|
{
|
|
PxU32 nbConstraintsToProcess = PxMin(nbConstraints - a, nbConstraintsPerBatch);
|
|
PxU32 nbBatches = (nbConstraintsToProcess + batchMask) / PXG_BATCH_SIZE;
|
|
|
|
PxgCpuConstraintPrePrepTask* task = (PxgCpuConstraintPrePrepTask*)mFlushPool.allocate(sizeof(PxgCpuConstraintPrePrepTask));
|
|
task = PX_PLACEMENT_NEW(task, PxgCpuConstraintPrePrepTask)(constraintIds, a, nbConstraintsToProcess,
|
|
mConstraintBatchHeaders + batchIndex, nbBatches, constraintBatchIndex, jointStartIndex + a, mConstraintUniqueIndices,
|
|
rigidPreprepIter.begin());
|
|
|
|
task->setContinuation(continuation);
|
|
task->removeReference();
|
|
|
|
for (PxU32 b = 0; b < nbBatches; ++b)
|
|
{
|
|
PxU32 val = batchIndex + b;
|
|
m1dConstraintBatchIndices.pushBack(val);
|
|
}
|
|
|
|
constraintBatchIndex += nbBatches;
|
|
batchIndex += nbBatches;
|
|
}
|
|
|
|
PxU32 remainingContacts = nbContacts;
|
|
|
|
PxU32 localOffset = 0;
|
|
|
|
//While there are constraints in this partition, process them in chunks of ~nbConstraintsPerBatch
|
|
while ((runningContactCount + remainingContacts) >= nbConstraintsPerBatch)
|
|
{
|
|
//We are aiming to process approximately 2048 constraints. However, to simplify the logic in the CPU PrePrep task,
|
|
//we actually can process a little more than that to fill up entire batches. Each batch contains 32 constraints.
|
|
PxU32 nbConstraintsFromThisPartition = nbConstraintsPerBatch - runningContactCount; //Number of constraints from this partition
|
|
PxU32 nbBatchesFromThisPartition = ((nbConstraintsFromThisPartition + batchMask) / PXG_BATCH_SIZE); //The number of batches from this partition (groups of 32 constraints)
|
|
|
|
//Round up the number of constraints from this partition to be full batches unless there are insufficient constraints in this partition to create a full batch
|
|
nbConstraintsFromThisPartition = PxMin((nbConstraintsFromThisPartition + batchMask)&(~(batchMask)), remainingContacts);
|
|
|
|
PxU32 totalBatches = runningBatchCount + nbBatchesFromThisPartition;
|
|
|
|
PxU32 nbConstraintsToProcess = runningContactCount + nbConstraintsFromThisPartition;
|
|
|
|
PxgCpuContactPrePrepTask* task = (PxgCpuContactPrePrepTask*)mFlushPool.allocate(sizeof(PxgCpuContactPrePrepTask));
|
|
task = PX_PLACEMENT_NEW(task, PxgCpuContactPrePrepTask)(mIncrementalPartition, startIdx, startOffset, nbConstraintsToProcess,
|
|
startSlabIter.begin(), startBatchOffset, mIncrementalPartition.getContactStartIndices().begin(),
|
|
mConstraintBatchHeaders, totalBatches, contactBatchIndex, mContactUniqueIndices,
|
|
mOutputIterator, mPatchStreamAllocators[mCurrentContactStream]->mStart,
|
|
mContactStreamAllocators[mCurrentContactStream]->mStart);
|
|
|
|
task->setContinuation(continuation);
|
|
task->removeReference();
|
|
|
|
//Update contact counts
|
|
remainingContacts -= nbConstraintsFromThisPartition;
|
|
localOffset += nbConstraintsFromThisPartition;
|
|
|
|
for (PxU32 b = 0; b < nbBatchesFromThisPartition; ++b)
|
|
{
|
|
PxU32 val = batchIndex + b;
|
|
mContactConstraintBatchIndices.pushBack(val);
|
|
}
|
|
//Update iteration indices in this partition
|
|
contactBatchIndex += totalBatches;
|
|
batchIndex += nbBatchesFromThisPartition;
|
|
batchOffset += nbBatchesFromThisPartition;
|
|
|
|
//Update global task iteration indices
|
|
startIdx = i;
|
|
startOffset = localOffset;
|
|
startBatchOffset = batchOffset;
|
|
runningContactCount = 0;
|
|
runningBatchCount = 0;
|
|
}
|
|
|
|
//We have remaining constraints. If so, sum them up and continue iterating...
|
|
PxU32 remainingBatches = (remainingContacts + batchMask) / PXG_BATCH_SIZE;
|
|
runningContactCount += remainingContacts;
|
|
runningBatchCount += remainingBatches;
|
|
|
|
for (PxU32 b = 0; b < remainingBatches; ++b)
|
|
{
|
|
PxU32 val = batchIndex + b;
|
|
mContactConstraintBatchIndices.pushBack(val);
|
|
}
|
|
|
|
//batchIndex += runningBatchCount;
|
|
|
|
PxU32 localArtiJointStartIndex = mIncrementalPartition.getArtiJointStartIndices()[i];
|
|
|
|
//constraintBatchIndex += contactBatchIndex;
|
|
|
|
//articulation constraints
|
|
for (PxU32 a = 0; a < nbArtiConstraints; a += nbArtiConstraintsPerBatch)
|
|
{
|
|
//each constraint is a batch
|
|
PxU32 nbConstraintsToProcess = PxMin(nbArtiConstraints - a, nbArtiConstraintsPerBatch);
|
|
PxU32 nbBatchesFromThisPartition = ((nbConstraintsToProcess + batchMask) / PXG_BATCH_SIZE); //The number of batches from this partition (groups of 32 constraints)
|
|
|
|
PxgCpuArtiConstraintPrePrepTask* task = (PxgCpuArtiConstraintPrePrepTask*)mFlushPool.allocate(sizeof(PxgCpuArtiConstraintPrePrepTask));
|
|
task = PX_PLACEMENT_NEW(task, PxgCpuArtiConstraintPrePrepTask)(artiConstraintIds, a, nbConstraintsToProcess,
|
|
mArticConstraintBatchHeaders + localArticBatchIndex, nbBatchesFromThisPartition, articulationConstraintBatchIndex, localArtiJointStartIndex, mArtiConstraintUniqueIndices,
|
|
artiPreprepIter.begin(), false);
|
|
|
|
localArtiJointStartIndex += nbConstraintsToProcess;
|
|
|
|
task->setContinuation(continuation);
|
|
task->removeReference();
|
|
|
|
for (PxU32 b = 0; b < nbBatchesFromThisPartition; ++b)
|
|
{
|
|
PxU32 val = localArticBatchIndex + b;
|
|
mArti1dConstraintBatchIndices.pushBack(val);
|
|
}
|
|
|
|
articulationConstraintBatchIndex += nbBatchesFromThisPartition;
|
|
localArticBatchIndex += nbBatchesFromThisPartition;
|
|
}
|
|
|
|
PxU32 localArtiContactStartIndex = mIncrementalPartition.getArtiContactStartIndices()[i];
|
|
//articulation contacts
|
|
for (PxU32 a = 0; a < nbArtiContacts; a += nbArtiConstraintsPerBatch)
|
|
{
|
|
//each contact is a batch
|
|
PxU32 nbContactsToProcess = PxMin(nbArtiContacts - a, nbArtiConstraintsPerBatch);
|
|
PxU32 nbBatchesFromThisPartition = ((nbContactsToProcess + batchMask) / PXG_BATCH_SIZE); //The number of batches from this partition (groups of 32 constraints)
|
|
|
|
PxgCpuArtiConstraintPrePrepTask* task = (PxgCpuArtiConstraintPrePrepTask*)mFlushPool.allocate(sizeof(PxgCpuArtiConstraintPrePrepTask));
|
|
task = PX_PLACEMENT_NEW(task, PxgCpuArtiConstraintPrePrepTask)(artiContactIds, a, nbContactsToProcess,
|
|
mArticConstraintBatchHeaders + localArticBatchIndex, nbBatchesFromThisPartition, articulationContactBatchIndex, localArtiContactStartIndex, mArtiContactUniqueIndices,
|
|
artiPreprepIter.begin(), true);
|
|
|
|
localArtiContactStartIndex += nbContactsToProcess;
|
|
|
|
task->setContinuation(continuation);
|
|
task->removeReference();
|
|
|
|
for (PxU32 b = 0; b < nbBatchesFromThisPartition; ++b)
|
|
{
|
|
PxU32 val = localArticBatchIndex + b;
|
|
mArtiContactConstraintBatchIndices.pushBack(val);
|
|
}
|
|
|
|
articulationContactBatchIndex += nbBatchesFromThisPartition;
|
|
localArticBatchIndex += nbBatchesFromThisPartition;
|
|
}
|
|
}
|
|
|
|
if (runningBatchCount > 0)
|
|
{
|
|
//There are remaining unprocessed contact constraints
|
|
PxgCpuContactPrePrepTask* task = (PxgCpuContactPrePrepTask*)mFlushPool.allocate(sizeof(PxgCpuContactPrePrepTask));
|
|
task = PX_PLACEMENT_NEW(task, PxgCpuContactPrePrepTask)(mIncrementalPartition, startIdx, startOffset, runningContactCount,
|
|
startSlabIter.begin(), startBatchOffset, mIncrementalPartition.getContactStartIndices().begin(),
|
|
mConstraintBatchHeaders, runningBatchCount, contactBatchIndex, mContactUniqueIndices,
|
|
mOutputIterator, mPatchStreamAllocators[mCurrentContactStream]->mStart,
|
|
mContactStreamAllocators[mCurrentContactStream]->mStart);
|
|
|
|
task->setContinuation(continuation);
|
|
task->removeReference();
|
|
}
|
|
}
|
|
|
|
doStaticArticulationConstraintPrePrep(continuation, articulationConstraintBatchIndex, articulationContactBatchIndex);
|
|
doStaticRigidConstraintPrePrep(continuation);
|
|
|
|
mGpuSolverCore->releaseContext();
|
|
}
|
|
|
|
void PxgGpuContext::doConstraintPrePrepGPUCommon(bool hasForceThresholds)
|
|
{
|
|
mLostTouchTask->removeReference();
|
|
|
|
const PxU32 nbCombinedSlabPartitions = mIncrementalPartition.getCombinedSlabNbPartitions();
|
|
|
|
{
|
|
mConstraintsPerPartition.forceSize_Unsafe(0);
|
|
if (mConstraintsPerPartition.capacity() < nbCombinedSlabPartitions)
|
|
mConstraintsPerPartition.reserve(2 * nbCombinedSlabPartitions);
|
|
|
|
mArtiConstraintsPerPartition.forceSize_Unsafe(0);
|
|
if (mArtiConstraintsPerPartition.capacity() < nbCombinedSlabPartitions)
|
|
mArtiConstraintsPerPartition.reserve(2 * nbCombinedSlabPartitions);
|
|
|
|
for (PxU32 a = 0; a < nbCombinedSlabPartitions; ++a)
|
|
{
|
|
mConstraintsPerPartition.pushBack(mIncrementalPartition.getCSlabAccumulatedPartitionCount(a));
|
|
mArtiConstraintsPerPartition.pushBack(mIncrementalPartition.getCSlabAccumulatedArtiPartitionCount(a));
|
|
}
|
|
}
|
|
|
|
mIslandContextPool->mStartPartitionIndex = 0;
|
|
mIslandContextPool->mNumPartitions = nbCombinedSlabPartitions;
|
|
mIslandContextPool->mBatchStartIndex = 0;
|
|
mIslandContextPool->mBatchCount = mIncrementalPartition.getNbConstraintBatches() + mIncrementalPartition.getNbContactBatches();
|
|
|
|
mIslandContextPool->mArtiBatchStartIndex = 0;
|
|
mIslandContextPool->mArtiBatchCount = mIncrementalPartition.getNbArtiConstraintBatches() + mIncrementalPartition.getNbArtiContactBatches();
|
|
//mIslandContextPool->mStaticArtiBatchCount = getSimulationController()->getBodySimManager().mTotalArticJoints + getSimulationController()->getBodySimManager().mTotalArticContacts;
|
|
|
|
PxgJointManager& jointManager = getSimulationController()->getJointManager();
|
|
const PxU32 gpuRigidJointSize = jointManager.getGpuNbRigidConstraints();
|
|
const PxU32 cpuRigidJointSize = jointManager.getCpuNbRigidConstraints();
|
|
const PxU32 gpuArtiJointSize = jointManager.getGpuNbArtiConstraints();
|
|
const PxU32 cpuArtiJointSize = jointManager.getCpuNbArtiConstraints();
|
|
PxgConstraintPrePrepData ppData;
|
|
ppData.nbGpuRigidJoints = gpuRigidJointSize;
|
|
ppData.nbTotalRigidJoints = gpuRigidJointSize + cpuRigidJointSize;
|
|
ppData.nbGpuArtiJoints = gpuArtiJointSize;
|
|
ppData.nbTotalArtiJoints = gpuArtiJointSize + cpuArtiJointSize;
|
|
|
|
ppData.numContactBatches = PxU32(mNumContactBatches);
|
|
ppData.num1dConstraintBatches = PxU32(mNum1dConstraintBatches);
|
|
ppData.numStaticContactBatches = PxU32(mNumStaticRigidContactBatches);
|
|
ppData.numStatic1dConstraintBatches = PxU32(mNumStaticRigid1dConstraintBatches);
|
|
|
|
ppData.numArtiContactsBatches = PxU32(mNumArtiContactBatches);
|
|
ppData.numArti1dConstraintBatches = PxU32(mNumArti1dConstraintBatches);
|
|
ppData.numArtiStaticContactsBatches = PxU32(mNumStaticArtiContactBatches);
|
|
ppData.numArtiStatic1dConstraintBatches = PxU32(mNumStaticArti1dConstraintBatches);
|
|
ppData.numArtiSelfContactsBatches = PxU32(mNumSelfArtiContactBatches);
|
|
ppData.numArtiSelf1dConstraintBatches = PxU32(mNumSelfArti1dConstraintBatches);
|
|
|
|
ppData.artiStaticConstraintBatchOffset = PxU32(mArtiStaticConstraintBatchOffset);
|
|
ppData.artiStaticContactBatchOffset = PxU32(mArtiStaticContactBatchOffset);
|
|
|
|
ppData.contactUniqueIndices = mContactUniqueIndices;
|
|
ppData.constraintUniqueIndices = mConstraintUniqueIndices;
|
|
ppData.artiContactUniqueIndices = mArtiContactUniqueIndices;
|
|
ppData.artiConstraintUniqueindices = mArtiConstraintUniqueIndices;
|
|
ppData.artiStaticConstraintUniqueIndices = mArtiStaticConstraintUniqueIndices;
|
|
ppData.artiStaticContactUniqueIndices = mArtiStaticContactUniqueIndices;
|
|
|
|
ppData.artiStaticConstraintStartIndex = mArtiStaticConstraintStartIndex;
|
|
ppData.artiStaticConstraintCount = mArtiStaticConstraintCount;
|
|
ppData.artiStaticContactStartIndex = mArtiStaticContactStartIndex;
|
|
ppData.artiStaticContactCount = mArtiStaticContactCount;
|
|
|
|
ppData.constraint1DBatchIndices = m1dConstraintBatchIndices.begin();
|
|
ppData.constraintContactBatchIndices = mContactConstraintBatchIndices.begin();
|
|
ppData.artiConstraint1dBatchindices = mArti1dConstraintBatchIndices.begin();
|
|
ppData.artiConstraintContactBatchIndices = mArtiContactConstraintBatchIndices.begin();
|
|
|
|
PxgConstantData cData;
|
|
cData.dt = mDt;
|
|
cData.invDtF32 = mInvDt;
|
|
cData.bounceThresholdF32 = mBounceThreshold;
|
|
cData.frictionOffsetThreshold = mFrictionOffsetThreshold;
|
|
cData.correlationDistance = mCorrelationDistance;
|
|
cData.ccdMaxSeparation = mCCDSeparationThreshold;
|
|
cData.biasCoefficient = mIslandContextPool->mBiasCoefficient;
|
|
cData.gravity = mGravity;
|
|
|
|
PxgBodySimManager& bodySimManager = getSimulationController()->getBodySimManager();
|
|
|
|
PxgPartitionData pData;
|
|
pData.constraintsPerPartition = mConstraintsPerPartition.begin();
|
|
pData.numConstraintsPerPartition = mConstraintsPerPartition.size();
|
|
pData.artiConstraintsPerPartition = mArtiConstraintsPerPartition.begin();
|
|
pData.numArtiConstraintsPerPartition = mArtiConstraintsPerPartition.size();
|
|
pData.numTotalContacts = mIncrementalPartition.getTotalContacts();
|
|
pData.numTotalStaticConstraints = bodySimManager.mTotalStaticRBJoints;
|
|
pData.numTotalStaticContacts = bodySimManager.mTotalStaticRBContacts;
|
|
pData.numTotalConstraints = mIncrementalPartition.getTotalConstraints();
|
|
pData.numTotalArtiContacts = mIncrementalPartition.getTotalArticulationContacts();
|
|
pData.numTotalArtiConstraints = mIncrementalPartition.getTotalArticulationConstraints();
|
|
pData.numTotalArtiStaticContacts = bodySimManager.mTotalStaticArticContacts;
|
|
pData.numTotalArtiStaticConstraints = bodySimManager.mTotalStaticArticJoints;
|
|
pData.numTotalArtiSelfContacts = bodySimManager.mTotalSelfArticContacts;
|
|
pData.numTotalArtiSelfConstraints = bodySimManager.mTotalSelfArticJoints;
|
|
pData.artiStaticConstraintBatchOffset = mArtiStaticConstraintBatchOffset;
|
|
pData.artiStaticContactBatchOffset = mArtiStaticContactBatchOffset;
|
|
|
|
mIslandContextPool->mStaticArtiBatchCount = mNumArtiStaticConstraintBatches;
|
|
mIslandContextPool->mSelfArtiBatchCount = mNumArtiSelfConstraintBatches;
|
|
mIslandContextPool->mStaticRigidBatchCount = mNumRigidStaticConstraintBatches;
|
|
|
|
const PxU32 maxCombinedSlabPartitions = mIncrementalPartition.getCombinedSlabMaxNbPartitions();
|
|
const PxU32 nbSlabs = PxMax(1u, (mIncrementalPartition.getNbPartitions() + maxCombinedSlabPartitions - 1) / maxCombinedSlabPartitions);
|
|
const PxU32 nbPartitions = PxMin(mIncrementalPartition.getNbPartitions(), maxCombinedSlabPartitions);
|
|
|
|
mGpuArticulationCore->allocDeltaVBuffer(nbSlabs, nbPartitions, mGpuSolverCore->getStream());
|
|
|
|
mGpuSolverCore->gpuMemDMAUp(*mPinnedMemoryAllocator, ppData, mSolverBodyPool.size(),
|
|
mConstraintBatchHeaders, mIslandContextPool, mNumIslandContextPool, pData,
|
|
mNumConstraintBatches, mNumRigidStaticConstraintBatches, mNumArticConstraintBatches, mNumArtiStaticConstraintBatches, mNumArtiSelfConstraintBatches, cData,
|
|
PXG_MAX_NUM_POINTS_PER_CONTACT_PATCH * (mNumContactBatches + mNumStaticRigidContactBatches), 4u * (mNumContactBatches + mNumStaticRigidContactBatches),
|
|
PXG_MAX_NUM_POINTS_PER_CONTACT_PATCH * (mNumArtiContactBatches + mNumStaticArtiContactBatches + mNumSelfArtiContactBatches), 4u * (mNumArtiContactBatches + mNumStaticArtiContactBatches + mNumSelfArtiContactBatches),
|
|
mTotalEdges, mTotalPreviousEdges,
|
|
nbSlabs,
|
|
maxCombinedSlabPartitions, mEnableStabilization, mPatchStreamAllocators[mCurrentContactStream]->mStart, mContactStreamAllocators[mCurrentContactStream]->mStart,
|
|
mForceStreamAllocator->mStart, mOutputIterator, mSolverBodyPool.size() - (mKinematicCount + 1), mKinematicCount + 1, mArticulationCount,
|
|
reinterpret_cast<Cm::UnAlignedSpatialVector*>(mGpuArticulationCore->getDeferredZ()),
|
|
reinterpret_cast<PxU32*>(mGpuArticulationCore->getArticulationDirty()),
|
|
reinterpret_cast<uint4*>(mGpuArticulationCore->getArticulationSlabMask()),
|
|
mGPUShapeInteractions, mGPURestDistances, mGPUTorsionalData, mArtiStaticContactIndices.begin(), mArtiStaticContactIndices.size(),
|
|
mArtiStaticJointIndices.begin(), mArtiStaticJointIndices.size(), mArtiStaticContactCounts.begin(), mArtiStaticJointCounts.begin(),
|
|
mArtiSelfContactIndices.begin(), mArtiSelfContactIndices.size(),
|
|
mArtiSelfJointIndices.begin(), mArtiSelfJointIndices.size(), mArtiSelfContactCounts.begin(), mArtiSelfJointCounts.begin(),
|
|
mRigidStaticContactIndices.begin(), mRigidStaticContactIndices.size(), mRigidStaticJointIndices.begin(), mRigidStaticJointIndices.size(),
|
|
mRigidStaticContactCounts.begin(), mRigidStaticJointCounts.begin(), mLengthScale, hasForceThresholds);
|
|
|
|
//Make sure that the GPU articulation work has completed now...
|
|
mGpuArticulationCore->syncUnconstrainedVelocities();
|
|
mGpuArticulationCore->layoutDeltaVBuffer(nbSlabs, nbPartitions, mGpuSolverCore->getStream());
|
|
|
|
mGpuArticulationCore->createStaticContactAndConstraintsBatch(mArticulationCount);
|
|
|
|
mGpuSolverCore->constraintPrePrepParallel(mNumConstraintBatches + mNumRigidStaticConstraintBatches + mNumArticConstraintBatches + mNumArtiStaticConstraintBatches + mNumArtiSelfConstraintBatches, gpuRigidJointSize + gpuArtiJointSize,
|
|
mIslandContextPool->mBodyCount);
|
|
}
|
|
|
|
void PxgCpuJointPrePrepTask::runInternal()
|
|
{
|
|
PxU32 endIndex = mStartIndex + mNbToProcess;
|
|
|
|
Px1DConstraint tempRows[Dy::MAX_CONSTRAINT_ROWS];
|
|
|
|
for (PxU32 i = mStartIndex; i < endIndex; ++i)
|
|
{
|
|
const Dy::Constraint* constraint = mConstraints[i];
|
|
|
|
const PxConstraintSolverPrep solverPrep = constraint->solverPrep;
|
|
|
|
if (!solverPrep)
|
|
continue;
|
|
|
|
const PxTransform& pose0 = (constraint->body0 ? constraint->body0->getPose() : PxTransform(PxIdentity));
|
|
const PxTransform& pose1 = (constraint->body1 ? constraint->body1->getPose() : PxTransform(PxIdentity));
|
|
const void* constantBlock = constraint->constantBlock;
|
|
|
|
PxgConstraintData& data = mConstraintData[i];
|
|
//Px1DConstraint* rows = &rowIter[i*Dy::MAX_CONSTRAINT_ROWS];
|
|
|
|
PxMemZero(tempRows, sizeof(Px1DConstraint)*Dy::MAX_CONSTRAINT_ROWS);
|
|
|
|
for (PxU32 j = 0; j < Dy::MAX_CONSTRAINT_ROWS; j++)
|
|
{
|
|
Px1DConstraint& c = tempRows[j];
|
|
c.minImpulse = -PX_MAX_REAL;
|
|
c.maxImpulse = PX_MAX_REAL;
|
|
}
|
|
|
|
PxConstraintInvMassScale ims(1.0f, 1.0f, 1.0f, 1.0f);
|
|
PxVec3p ra, rb;
|
|
PxVec3p body0WorldOffset(0.0f);
|
|
|
|
//TAG:solverprepcall
|
|
const PxU32 numRows = (constraint->flags & PxConstraintFlag::eDISABLE_CONSTRAINT) ? 0 :(*solverPrep)(tempRows,
|
|
body0WorldOffset,
|
|
Dy::MAX_CONSTRAINT_ROWS,
|
|
ims,
|
|
constantBlock,
|
|
pose0, pose1, !!(constraint->flags & PxConstraintFlag::eENABLE_EXTENDED_LIMITS), ra, rb);
|
|
|
|
data.mNumRows_Flags_StartIndex.x = numRows;
|
|
|
|
if (numRows == 0)
|
|
continue;
|
|
|
|
ra -= pose0.p;
|
|
rb -= pose1.p;
|
|
|
|
data.mInvMassScale.linear0 = ims.linear0;
|
|
data.mInvMassScale.angular0 = ims.angular0;
|
|
data.mInvMassScale.linear1 = ims.linear1;
|
|
data.mInvMassScale.angular1 = ims.angular1;
|
|
data.mRaWorld_linBreakForceW = make_float4(ra.x, ra.y, ra.z, constraint->linBreakForce);
|
|
data.mRbWorld_angBreakForceW = make_float4(rb.x, rb.y, rb.z, constraint->angBreakForce);
|
|
|
|
data.mNumRows_Flags_StartIndex.y = constraint->flags;
|
|
|
|
PxI32 startRowIndex = PxAtomicAdd(mRowCounts, PxI32(numRows)) - PxI32(numRows);
|
|
|
|
PxMemCopy(mConstraintRows + startRowIndex, tempRows, sizeof(Px1DConstraint) * numRows);
|
|
|
|
data.mNumRows_Flags_StartIndex.z = mGpuJointOffset + startRowIndex;
|
|
}
|
|
}
|
|
|
|
void PxgGpuContext::cpuJointPrePrepTask(physx::PxBaseTask* continuation)
|
|
{
|
|
PxgJointManager& jointManager = getSimulationController()->getJointManager();
|
|
|
|
// AD: This could also be skipped with direct-GPU API, but at this point the constraints are already partitioned and I
|
|
// cannot figure out how to remove the CPU joints from there again.
|
|
|
|
const PxArray<const Dy::Constraint*>& cpuRigidConstraints = jointManager.getCpuRigidConstraints();
|
|
const PxArray<const Dy::Constraint*>& cpuArtiConstraints = jointManager.getCpuArtiConstraints();
|
|
|
|
const PxU32 nbCpuRigidConstraints = cpuRigidConstraints.size();
|
|
const PxU32 nbCpuArtiConstraints = cpuArtiConstraints.size();
|
|
|
|
const PxU32 gpuRigidJointOutputOffset = jointManager.getGpuNbRigidConstraints() * Dy::MAX_CONSTRAINT_ROWS;
|
|
|
|
const PxU32 nbJointsPerTask = 128u; // PT: TODO: revisit
|
|
//for other joint
|
|
for (PxU32 a = 0; a < nbCpuRigidConstraints; a += nbJointsPerTask)
|
|
{
|
|
const PxU32 nbToProcess = PxMin(nbCpuRigidConstraints - a, nbJointsPerTask);
|
|
PxgCpuJointPrePrepTask* task = reinterpret_cast<PxgCpuJointPrePrepTask*>(mFlushPool.allocate(sizeof(PxgCpuJointPrePrepTask)));
|
|
task = PX_PLACEMENT_NEW(task, PxgCpuJointPrePrepTask)(*getSimulationController(), a, nbToProcess, gpuRigidJointOutputOffset,
|
|
cpuRigidConstraints.begin(), jointManager.getCpuRigidConstraintData().begin(), jointManager.getCpuRigidConstraintRows().begin(),
|
|
&jointManager.mNbCpuRigidConstraintRows);
|
|
|
|
task->setContinuation(continuation);
|
|
task->removeReference();
|
|
}
|
|
|
|
const PxU32 gpuArtiJointOutputOffset = jointManager.getGpuNbArtiConstraints() * Dy::MAX_CONSTRAINT_ROWS;
|
|
|
|
for (PxU32 a = 0; a < nbCpuArtiConstraints; a += nbJointsPerTask)
|
|
{
|
|
const PxU32 nbToProcess = PxMin(nbCpuArtiConstraints - a, nbJointsPerTask);
|
|
PxgCpuJointPrePrepTask* task = reinterpret_cast<PxgCpuJointPrePrepTask*>(mFlushPool.allocate(sizeof(PxgCpuJointPrePrepTask)));
|
|
task = PX_PLACEMENT_NEW(task, PxgCpuJointPrePrepTask)(*getSimulationController(), a, nbToProcess, gpuArtiJointOutputOffset,
|
|
cpuArtiConstraints.begin(), jointManager.getCpuArtiConstraintData().begin(),
|
|
jointManager.getCpuArtiConstraintRows().begin(), &jointManager.mNbCpuArtiConstraintRows);
|
|
|
|
task->setContinuation(continuation);
|
|
task->removeReference();
|
|
}
|
|
}
|
|
|
|
// This class figures out the max iteration counts for all actors,
|
|
// and prepares some data for kinematics.
|
|
void PxgCpuPreIntegrationTask::runInternal()
|
|
{
|
|
mContext.doPreIntegrationTaskCommon(mCont);
|
|
}
|
|
|
|
void PxgCpuContactPrePrepTask::runInternal()
|
|
{
|
|
PX_PROFILE_ZONE("GpuDynamics.PxgCpuContactPrePrepTask", 0);
|
|
|
|
const PxU32 nbToProcess = mNumBatches;
|
|
PxU32 nbProcessed = 0;
|
|
PxU32 partitionIdx = mPartitionIndex;
|
|
PxU32 partitionStartIdx = mStartIndexWithinPartition;
|
|
PxU32 startSlabOffset = mStartSlabOffset;
|
|
|
|
PxU32 workUnitIndex = mWorkUnitStartIndex;
|
|
|
|
while (nbProcessed < nbToProcess)
|
|
{
|
|
//Extract current partition
|
|
const Partition& partition = mPartition.getPartitionSlabs()[partitionIdx / PXG_BATCH_SIZE]->mPartitions[partitionIdx&(PXG_BATCH_SIZE - 1)];
|
|
//Get edgeIndices corresponding to this partition offset by partitionStartIdx
|
|
const PartitionIndices& edgeIds = partition.mPartitionIndices[PxgEdgeType::eCONTACT_MANAGER];// +partitionStartIdx;
|
|
|
|
//Factor in joint constraints to work out offsets in this partition. As this task can now process multiple partitions,
|
|
//it is easiest just to compute them again here
|
|
{
|
|
const PxU32 nbConstraints = partition.mPartitionIndices[PxgEdgeType::eCONSTRAINT].size();
|
|
const PxU32 nbBatches = (nbConstraints + 31u) / PXG_BATCH_SIZE;
|
|
startSlabOffset += nbBatches;
|
|
}
|
|
|
|
const PxU32 batchIndex = mStartSlabIter[partitionIdx] + startSlabOffset;
|
|
const PxU32 uniqueStartIndex = mContactStartIndices[partitionIdx] + partitionStartIdx;
|
|
|
|
//The number we process in this partition is equal to the smaller of (nbToProcess - nbProcessed) and (size of partition - startOffsetInPartition).
|
|
const PxU32 nbRemaining = partition.mPartitionIndices[PxgEdgeType::eCONTACT_MANAGER].size() - partitionStartIdx;
|
|
//Convert from constraints to batches
|
|
const PxU32 nbBatchesToProcess = PxMin((nbToProcess - nbProcessed), (nbRemaining + 31) / PXG_BATCH_SIZE);
|
|
|
|
PxU32 currentEdgeIndex = 0;
|
|
|
|
for (PxU32 a = 0; a < nbBatchesToProcess; ++a)
|
|
{
|
|
const PxU32 descStride = PxMin(nbRemaining - currentEdgeIndex, PXG_BATCH_SIZE);
|
|
|
|
PxgConstraintBatchHeader& batchHeader = mBatchHeaders[a + batchIndex];
|
|
batchHeader.constraintType = PxgSolverConstraintDesc::eCONTACT;
|
|
batchHeader.mDescStride = PxU16(descStride);
|
|
batchHeader.mConstraintBatchIndex = workUnitIndex++;
|
|
batchHeader.mStartPartitionIndex = uniqueStartIndex + a * PXG_BATCH_SIZE;
|
|
batchHeader.mask = 0xFFFFFFFF; //Unused
|
|
|
|
#if PXG_CONTACT_VALIDATION
|
|
validateContactPairs(a, a + descStride, edgeIds + a, mNpIds, mOutputIterator, mBaseContactPatch, mBaseContactPointer);
|
|
#endif
|
|
currentEdgeIndex += descStride;
|
|
}
|
|
|
|
for (PxU32 i = 0; i < nbRemaining; ++i)
|
|
{
|
|
const PxU32 uniqueId = edgeIds[i + partitionStartIdx];
|
|
mPinnedEdgeIds[uniqueStartIndex + i] = uniqueId;
|
|
}
|
|
|
|
nbProcessed += nbBatchesToProcess;
|
|
partitionIdx++;
|
|
partitionStartIdx = 0;
|
|
startSlabOffset = 0;
|
|
|
|
//PxMemCopy(mPinnedEdgeIds + uniqueStartIndex, edgeIds, sizeof(PxU32) * nbRemaining);
|
|
}
|
|
}
|
|
|
|
void PxgGpuContext::allocateTempPinnedSolverMemoryCommon()
|
|
{
|
|
// AD: two stages.
|
|
// 1. first figure out how much we need. Allocate PxMax(sizeNeeded, PxGpuDynamicsMemoryConfig::tempBufferCapacity).
|
|
// 2. suballocate and set the pointers.
|
|
|
|
// AD: old comment that moved here when outlining into a separate function. I don't know how relevant this still is.
|
|
// KS - this may be over-allocating because, at this stage, we only know (1) how many articulation static contacts
|
|
// we have in total, (2) how many is the max a given articulation has and (3) how many articulations we have.
|
|
// We allocate the minimum of maxBatches * numArticulations, totalContacts. We will likely require less than
|
|
// both of these counts, but this provides us with an upper-bound...
|
|
|
|
// this code operates under the assumption that we only have 1 solver island on GPU.
|
|
|
|
PxU64 sizeNeeded = 0;
|
|
const PxU32 alignment = 128; // GPU cache line size.
|
|
|
|
const PxU32 totalIslands = 1;
|
|
const PxU64 totalIslandsAllocationSize = (totalIslands * sizeof(PxgIslandContext)) + alignment;
|
|
sizeNeeded += totalIslandsAllocationSize;
|
|
|
|
mNumConstraintBatches = mIncrementalPartition.getNbConstraintBatches() + mIncrementalPartition.getNbContactBatches();
|
|
|
|
PxgBodySimManager& bodyManager = getSimulationController()->getBodySimManager();
|
|
const PxU32 maxStaticRigidJoints = bodyManager.mMaxStaticRBJoints;
|
|
const PxU32 maxStaticRigidContacts = bodyManager.mMaxStaticRBContacts;
|
|
const PxU32 nbRigidBatches = (mBodyCount + PXG_BATCH_SIZE - 1) / PXG_BATCH_SIZE;
|
|
const PxU32 totalStaticRigidContacts = bodyManager.mTotalStaticRBContacts;
|
|
const PxU32 totalStaticRigidJoints = bodyManager.mTotalStaticRBJoints;
|
|
|
|
mNumStaticRigidContactBatches = PxMin(maxStaticRigidContacts * nbRigidBatches, totalStaticRigidContacts);
|
|
mNumStaticRigid1dConstraintBatches = PxMin(maxStaticRigidJoints * nbRigidBatches, totalStaticRigidJoints);
|
|
mNumRigidStaticConstraintBatches = (mNumStaticRigidContactBatches + mNumStaticRigid1dConstraintBatches);
|
|
|
|
mNumArticConstraintBatches = mIncrementalPartition.getNbArtiConstraintBatches() + mIncrementalPartition.getNbArtiContactBatches();
|
|
|
|
const PxU32 nbArticBatches = (mArticulationCount + PXG_BATCH_SIZE - 1) / PXG_BATCH_SIZE;
|
|
const PxU32 maxStaticArticJoints = bodyManager.mMaxStaticArticJoints;
|
|
const PxU32 maxStaticArticContacts = bodyManager.mMaxStaticArticContacts;
|
|
const PxU32 totalStaticArticulationContacts = bodyManager.mTotalStaticArticContacts;
|
|
const PxU32 totalStaticArticulationJoints = bodyManager.mTotalStaticArticJoints;
|
|
|
|
mNumStaticArtiContactBatches = PxMin(maxStaticArticContacts * nbArticBatches, totalStaticArticulationContacts);
|
|
mNumStaticArti1dConstraintBatches = PxMin(maxStaticArticJoints * nbArticBatches, totalStaticArticulationJoints);
|
|
mNumArtiStaticConstraintBatches = (mNumStaticArtiContactBatches + mNumStaticArti1dConstraintBatches);
|
|
|
|
const PxU32 maxSelfArticJoints = bodyManager.mMaxSelfArticJoints;
|
|
const PxU32 maxSelfArticContacts = bodyManager.mMaxSelfArticContacts;
|
|
const PxU32 totalSelfArticulationContacts = bodyManager.mTotalSelfArticContacts;
|
|
const PxU32 totalSelfArticulationJoints = bodyManager.mTotalSelfArticJoints;
|
|
|
|
mNumSelfArtiContactBatches = PxMin(maxSelfArticContacts * nbArticBatches, totalSelfArticulationContacts);
|
|
mNumSelfArti1dConstraintBatches = PxMin(maxSelfArticJoints * nbArticBatches, totalSelfArticulationJoints);
|
|
mNumArtiSelfConstraintBatches = (mNumSelfArtiContactBatches + mNumSelfArti1dConstraintBatches);
|
|
|
|
const PxU64 allocationSizeConstraintBatchHeader = sizeof(PxgConstraintBatchHeader) * (mNumConstraintBatches + mNumRigidStaticConstraintBatches + mNumArticConstraintBatches + mNumArtiStaticConstraintBatches + mNumArtiSelfConstraintBatches);
|
|
const PxU64 allocationSizeConstraintBatchHeaderAligned = allocationSizeConstraintBatchHeader + alignment;
|
|
sizeNeeded += allocationSizeConstraintBatchHeaderAligned;
|
|
|
|
const PxU32 totalJoints = mIncrementalPartition.getTotalConstraints();
|
|
const PxU32 totalContacts = mIncrementalPartition.getTotalContacts();
|
|
const PxU32 totalArticulationJoints = mIncrementalPartition.getTotalArticulationConstraints();
|
|
const PxU32 totalArticulationContacts = mIncrementalPartition.getTotalArticulationContacts();
|
|
|
|
//Unique Indices layout is joint->contact->artiJoint->artiContact
|
|
const PxU64 allocationSizeUniqueIndices = (totalJoints + totalContacts + totalArticulationJoints
|
|
+ totalArticulationContacts + totalStaticArticulationJoints + totalStaticArticulationContacts + totalSelfArticulationContacts
|
|
+ totalSelfArticulationJoints + totalStaticRigidContacts + totalStaticRigidJoints) * sizeof(PxU32);
|
|
const PxU64 allocationSizeUniqueIndicesAligned = allocationSizeUniqueIndices + alignment;
|
|
sizeNeeded += allocationSizeUniqueIndicesAligned;
|
|
|
|
const PxU64 allocationSizeArticulationCount = mArticulationCount * 4 * sizeof(PxU32);
|
|
const PxU64 allocationSizeArticulationCountAligned = allocationSizeArticulationCount + alignment;
|
|
sizeNeeded += allocationSizeArticulationCountAligned;
|
|
|
|
const PxU64 allocationSizeBodyCount = mBodyCount * 2 * sizeof(PxU32);
|
|
const PxU64 allocationSizeBodyCountAligned = allocationSizeBodyCount + alignment;
|
|
sizeNeeded += allocationSizeBodyCountAligned;
|
|
|
|
// descriptors are part of the solvercore
|
|
sizeNeeded += mGpuSolverCore->getDescriptorsAllocationSize();
|
|
|
|
// phase 2 - actually allocate the memory
|
|
mPinnedMemoryAllocator->reserveAndGrow(static_cast<PxU32>(sizeNeeded));
|
|
|
|
#if PX_ENABLE_SIM_STATS
|
|
mSimStats.mGpuDynamicsTempBufferCapacity = PxMax(sizeNeeded,mSimStats.mGpuDynamicsTempBufferCapacity);
|
|
#else
|
|
PX_CATCH_UNDEFINED_ENABLE_SIM_STATS
|
|
#endif
|
|
|
|
mIslandContextPool = reinterpret_cast<PxgIslandContext*>(mPinnedMemoryAllocator->allocate(totalIslands * sizeof(PxgIslandContext), alignment));
|
|
|
|
mConstraintBatchHeaders = reinterpret_cast<PxgConstraintBatchHeader*>(mPinnedMemoryAllocator->allocate(allocationSizeConstraintBatchHeader, alignment));
|
|
mArticConstraintBatchHeaders = mConstraintBatchHeaders + mNumConstraintBatches;
|
|
|
|
mConstraintUniqueIndices = reinterpret_cast<PxU32*>(mPinnedMemoryAllocator->allocate(allocationSizeUniqueIndices, alignment));
|
|
mRigidStaticConstraintUniqueIndices = mConstraintUniqueIndices + totalJoints;
|
|
mArtiConstraintUniqueIndices = mRigidStaticConstraintUniqueIndices + totalStaticRigidJoints;
|
|
mArtiStaticConstraintUniqueIndices = mArtiConstraintUniqueIndices + totalArticulationJoints;
|
|
mArtiSelfConstraintUniqueIndices = mArtiStaticConstraintUniqueIndices + totalStaticArticulationJoints;
|
|
|
|
mContactUniqueIndices = mArtiSelfConstraintUniqueIndices + totalSelfArticulationJoints;
|
|
mRigidStaticContactUniqueIndices = mContactUniqueIndices + totalContacts;
|
|
mArtiContactUniqueIndices = mRigidStaticContactUniqueIndices + totalStaticRigidContacts;
|
|
mArtiStaticContactUniqueIndices = mArtiContactUniqueIndices + totalArticulationContacts;
|
|
mArtiSelfContactUniqueIndices = mArtiStaticContactUniqueIndices + totalStaticArticulationContacts;
|
|
|
|
mArtiStaticConstraintStartIndex = reinterpret_cast<PxU32*>(mPinnedMemoryAllocator->allocate(allocationSizeArticulationCount, alignment));
|
|
mArtiStaticConstraintCount = mArtiStaticConstraintStartIndex + mArticulationCount;
|
|
mArtiStaticContactStartIndex = mArtiStaticConstraintCount + mArticulationCount;
|
|
mArtiStaticContactCount = mArtiStaticContactStartIndex + mArticulationCount;
|
|
|
|
mRigidStaticConstraintStartIndex = reinterpret_cast<PxU32*>(mPinnedMemoryAllocator->allocate(allocationSizeBodyCount, alignment));
|
|
mRigidStaticConstraintCount = mRigidStaticConstraintStartIndex + mBodyCount;
|
|
|
|
mGpuSolverCore->allocatePinnedDescriptors(*mPinnedMemoryAllocator);
|
|
}
|
|
|
|
// PT: TODO: un-indent all of the above
|
|
|
|
void PxgGpuContext::doConstraintPrepGPU()
|
|
{
|
|
PX_PROFILE_ZONE("GpuDynamics.ConstraintPrep", 0);
|
|
/**
|
|
* Things to do in here:
|
|
* (1) constraint prep on GPU
|
|
*/
|
|
|
|
mGpuSolverCore->resetVelocities(mIsTGS);
|
|
|
|
mGpuSolverCore->nonRigidConstraintPrepare(mArticulationCount);
|
|
|
|
mGpuSolverCore->jointConstraintPrepareParallel(PxU32(mNum1dConstraintBatches + mNumStaticRigid1dConstraintBatches));
|
|
mGpuSolverCore->contactConstraintPrepareParallel(PxU32(mNumContactBatches + mNumStaticRigidContactBatches));
|
|
|
|
mGpuSolverCore->artiJointConstraintPrepare(PxU32(mNumArti1dConstraintBatches + mNumStaticArti1dConstraintBatches + mNumSelfArti1dConstraintBatches));
|
|
mGpuSolverCore->artiContactConstraintPrepare(PxU32(mNumArtiContactBatches + mNumStaticArtiContactBatches + mNumSelfArtiContactBatches));
|
|
|
|
mGpuArticulationCore->precomputeDependencies(PxMin(mIncrementalPartition.getNbPartitions(), mIncrementalPartition.getCombinedSlabMaxNbPartitions()));
|
|
}
|
|
|
|
void PxgGpuContext::doPreIntegrationGPU()
|
|
{
|
|
const PxU32 offset = 1 + mKinematicCount;
|
|
|
|
mGpuSolverCore->preIntegration(offset, mSolverBodyPool.size(), mDt, mGravity);
|
|
|
|
if(mIsTGS)
|
|
mIslandContextPool->mBiasCoefficient = PxMin(0.9f, 2.0f * PxSqrt(1.0f / mIslandContextPool->mNumPositionIterations));
|
|
}
|
|
|
|
void PxgGpuContext::doArticulationGPU()
|
|
{
|
|
if(mIsTGS)
|
|
{
|
|
mGpuArticulationCore->computeUnconstrainedVelocities(mArticulationStartIndex, mArticulationCount, mDt, mGravity, 1.0f/mLengthScale, mIsExternalForcesEveryTgsIterationEnabled, mRecomputeArticulationBlockFormat);
|
|
}
|
|
else
|
|
{
|
|
mGpuArticulationCore->computeUnconstrainedVelocities(mArticulationStartIndex, mArticulationCount, mDt, mGravity, 1.0f/mLengthScale, false, mRecomputeArticulationBlockFormat);
|
|
mGpuArticulationCore->setupInternalConstraints(mArticulationCount, mDt, mDt, 1.0f / mDt, false);
|
|
}
|
|
}
|
|
|
|
void PxgGpuContext::doSoftbodyGPU()
|
|
{
|
|
PxgSoftBodyCore* softBodyCore = static_cast<PxgSimulationController*>(mSimulationController)->getSoftBodyCore();
|
|
if(softBodyCore)
|
|
softBodyCore->updateTetraRotations();
|
|
}
|
|
|
|
void PxgGpuContext::doFEMClothGPU()
|
|
{
|
|
// "I quickly checked, and it currently only resets Lagrange multiplier lambda used in the PBD framework.
|
|
// For TGS, we don't use the Lagrange multiplier so no need to reset. Calling it on PGS only sounds okay to me."
|
|
if(!mIsTGS)
|
|
{
|
|
PxgFEMClothCore* femClothCore = static_cast<PxgSimulationController*>(mSimulationController)->getFEMClothCore();
|
|
if(femClothCore)
|
|
femClothCore->preIteration();
|
|
}
|
|
}
|
|
|
|
void PxgGpuContext::doConstraintPrePrepGPU()
|
|
{
|
|
if(mIsTGS)
|
|
{
|
|
//Kick off articulation internal constraint setup code. At this point, we know the iteration count so we
|
|
//know how large time-steps will be.
|
|
const PxReal stepDt = mDt / PxReal(mIslandContextPool->mNumPositionIterations);
|
|
|
|
mGpuArticulationCore->setupInternalConstraints(mArticulationCount, stepDt, mDt, 1.0f / stepDt, true);
|
|
}
|
|
|
|
doConstraintPrePrepGPUCommon(mHasForceThresholds);
|
|
}
|
|
|
|
void PxgPostSolveTask::runInternal()
|
|
{
|
|
mContext.doPostSolveTask(mCont);
|
|
}
|
|
|
|
//This class kicks off constraint solve on GPU
|
|
void PxgGpuTask::runInternal()
|
|
{
|
|
mContext.mGpuSolverCore->acquireContext();
|
|
|
|
mContext.doConstraintJointBlockPrePrepGPU();
|
|
|
|
mContext.doConstraintPrepGPU();
|
|
mContext.doConstraintSolveGPU(mMaxNodes, *mChangedHandleMap);
|
|
|
|
mContext.mGpuSolverCore->releaseContext();
|
|
}
|
|
|
|
void PxgGpuIntegrationTask::runInternal()
|
|
{
|
|
mContext.mGpuSolverCore->acquireContext();
|
|
|
|
//for articulation
|
|
mContext.doArticulationGPU();
|
|
|
|
//for soft body update rotation
|
|
mContext.doSoftbodyGPU();
|
|
|
|
//for FEM-cloth
|
|
mContext.doFEMClothGPU();
|
|
|
|
mContext.mGpuSolverCore->releaseContext();
|
|
}
|
|
|
|
void PxgGpuPrePrepTask::runInternal()
|
|
{
|
|
mContext.mGpuSolverCore->acquireContext();
|
|
|
|
mContext.doPreIntegrationGPU();
|
|
|
|
//for d6 joint
|
|
mContext.doConstraintPrePrepGPU();
|
|
|
|
PxgJointManager& jointManager = mContext.getSimulationController()->getJointManager();
|
|
jointManager.reserveMemory(Dy::MAX_CONSTRAINT_ROWS);
|
|
|
|
mContext.mGpuSolverCore->releaseContext();
|
|
|
|
mContext.cpuJointPrePrepTask(mCont);
|
|
}
|
|
|
|
void PxgGpuContext::updateBodyCore(PxBaseTask* continuation)
|
|
{
|
|
mPostSolveTask.setContinuation(continuation);
|
|
mPostSolveTask.removeReference();
|
|
}
|
|
|
|
//#define PXG_INCREMENTAL_SANITY_CHECKS
|
|
#if PX_ENABLE_ASSERTS
|
|
#ifdef PXG_INCREMENTAL_SANITY_CHECKS
|
|
template <typename T>
|
|
static bool noDuplicates(T* buffer, const PxU32 size)
|
|
{
|
|
for (PxU32 a = 0; a < size; ++a)
|
|
{
|
|
for (PxU32 b = 0; b < a; ++b)
|
|
{
|
|
if (buffer[a] == buffer[b])
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
#else
|
|
template <typename T>
|
|
static bool noDuplicates(T*, const PxU32)
|
|
{
|
|
return true;
|
|
}
|
|
#endif
|
|
#endif
|
|
|
|
static PX_FORCE_INLINE bool needsSolve(IG::IslandSim& islandSim, PxU32 bodyCount, PxU32 articulationCount)
|
|
{
|
|
const PxU32 particleCount = islandSim.getNbActiveNodes(IG::Node::ePARTICLESYSTEM_TYPE);
|
|
const PxU32 clothCount = islandSim.getNbActiveNodes(IG::Node::eDEFORMABLE_SURFACE_TYPE);
|
|
const PxU32 softBodyCount = islandSim.getNbActiveNodes(IG::Node::eDEFORMABLE_VOLUME_TYPE);
|
|
const bool needsSolve = (0 != bodyCount || 0 != articulationCount || particleCount || softBodyCount || clothCount);
|
|
return needsSolve;
|
|
}
|
|
|
|
void PxgGpuContext::update( Cm::FlushPool& flushPool, PxBaseTask* continuation, PxBaseTask* postPartitioningTask, PxBaseTask* /*lostTouchTask*/,
|
|
PxvNphaseImplementationContext* nphase, PxU32 /*maxPatchesPerCM*/, PxU32 /*maxArticulationLinks*/, PxReal dt,
|
|
const PxVec3& gravity, PxBitMapPinned& /*changedHandleMap*/)
|
|
{
|
|
mGpuSolverCore->acquireContext();
|
|
|
|
PxsContactManagerOutputIterator iterator = nphase->getContactManagerOutputs();
|
|
PxsContactManagerOutput* gpuContactManagerOutputs = nphase->getGPUContactManagerOutputBase();
|
|
|
|
mGPURestDistances = nphase->getGPURestDistances();
|
|
mGPUShapeInteractions = nphase->getGPUShapeInteractions();
|
|
mGPUTorsionalData = nphase->getGPUTorsionalData();
|
|
|
|
mSolvedThisFrame = false;
|
|
mOutputIterator = iterator;
|
|
PX_ASSERT(noDuplicates(nphase->getLostFoundPatchManagers(), nphase->getNbLostFoundPatchManagers()));
|
|
//First and foremost, we need to get a set of islands (bodies, constraints etc.)
|
|
//These will be parameters
|
|
IG::IslandSim& islandSim = mIslandManager.getAccurateIslandSim();
|
|
|
|
const PxU32 bodyCount = islandSim.getNbActiveNodes(IG::Node::eRIGID_BODY_TYPE);
|
|
const PxU32 articulationCount = islandSim.getNbActiveNodes(IG::Node::eARTICULATION_TYPE);
|
|
|
|
mGpuSolverCore->setGpuContactManagerOutputBase(gpuContactManagerOutputs);
|
|
|
|
if(!mIsTGS)
|
|
mGpuSolverCore->syncSimulationController(); // PT: for some reason it's located here in PGS
|
|
|
|
const PxU32 kinematicCount = islandSim.getNbActiveKinematics();
|
|
mKinematicCount = kinematicCount;
|
|
|
|
mArticulationCount = articulationCount;
|
|
mArticulationStartIndex = 1 + kinematicCount + bodyCount;
|
|
mRecomputeArticulationBlockFormat = getSimulationController()->getRecomputeArticulationBlockFormat();
|
|
|
|
mBodyCount = bodyCount;
|
|
|
|
mPinnedMemoryAllocator->reset();
|
|
|
|
#if PX_ENABLE_SIM_STATS
|
|
mSimStats.mNbActiveKinematicBodies = islandSim.getNbActiveKinematics();
|
|
mSimStats.mNbActiveDynamicBodies = islandSim.getNbActiveNodes(IG::Node::eRIGID_BODY_TYPE);
|
|
mSimStats.mNbActiveConstraints = islandSim.getNbActiveEdges(IG::Edge::eCONSTRAINT);
|
|
mSimStats.mNbPartitions = mIncrementalPartition.getNbPartitions();
|
|
#else
|
|
PX_CATCH_UNDEFINED_ENABLE_SIM_STATS
|
|
#endif
|
|
//mConstraintWriteBackStreamAllocator->reserve(sizeof(Dy::ConstraintWriteback) * nbConstraints);
|
|
|
|
mConstraintsPerPartition.forceSize_Unsafe(0);
|
|
mDt = dt;
|
|
mInvDt = 1.f / dt;
|
|
mGravity = gravity;
|
|
//mEnableStabilization = enableStabilization;
|
|
|
|
if(mIsTGS)
|
|
mGpuSolverCore->syncSimulationController();
|
|
|
|
{
|
|
PX_PROFILE_ZONE("Dynamics.allocateBodyBuffers", 0);
|
|
|
|
const PxU32 maxLinks = getSimulationController()->getSimulationCore()->getMaxArticulationLinks();
|
|
const PxU32 maxDofs = getSimulationController()->getSimulationCore()->getMaxArticulationDofs();
|
|
|
|
const PxU32 totalLinkJointRootStateByteSize =
|
|
PxgArticulationLinkJointRootStateData::computeStateDataBufferByteSizeAligned16(maxLinks, maxDofs, articulationCount);
|
|
|
|
if (totalLinkJointRootStateByteSize > mLinkAndJointAndRootStateDataPool.capacity())
|
|
{
|
|
mLinkAndJointAndRootStateDataPool.forceSize_Unsafe(0);
|
|
mLinkAndJointAndRootStateDataPool.reserve(totalLinkJointRootStateByteSize);
|
|
}
|
|
|
|
if (articulationCount > mArticulationSleepDataPool.capacity())
|
|
{
|
|
mArticulationSleepDataPool.forceSize_Unsafe(0);
|
|
mArticulationSleepDataPool.reserve(articulationCount);
|
|
}
|
|
|
|
if (articulationCount*2 > mInternalResidualPerArticulationVelIter.capacity())
|
|
{
|
|
mInternalResidualPerArticulationVelIter.forceSize_Unsafe(0);
|
|
mInternalResidualPerArticulationVelIter.reserve(articulationCount*2);
|
|
}
|
|
if (articulationCount*2 > mInternalResidualPerArticulationPosIter.capacity())
|
|
{
|
|
mInternalResidualPerArticulationPosIter.forceSize_Unsafe(0);
|
|
mInternalResidualPerArticulationPosIter.reserve(articulationCount*2);
|
|
}
|
|
|
|
mLinkAndJointAndRootStateDataPool.forceSize_Unsafe(totalLinkJointRootStateByteSize);
|
|
mArticulationSleepDataPool.forceSize_Unsafe(articulationCount);
|
|
mInternalResidualPerArticulationVelIter.forceSize_Unsafe(articulationCount * 2);
|
|
mInternalResidualPerArticulationPosIter.forceSize_Unsafe(articulationCount * 2);
|
|
|
|
//1: Allocate buffers for all bodies (kinematic + dynamic)
|
|
if ((kinematicCount + bodyCount + 1) > mSolverBodyPool.capacity())
|
|
{
|
|
//we don't need to dma up/back dynamic solver body data to gpu anymore. However, we still need to dma up static/kinematic solver body
|
|
const PxU32 totalBodyAlignedCounts = (kinematicCount + bodyCount + 31 + 1) & (~31);
|
|
|
|
mSolverBodyPool.forceSize_Unsafe(0);
|
|
mSolverBodyPool.reserve(totalBodyAlignedCounts);
|
|
|
|
mBody2WorldPool.forceSize_Unsafe(0);
|
|
mBody2WorldPool.reserve(totalBodyAlignedCounts);
|
|
|
|
mSolverBodyDataPool.forceSize_Unsafe(0);
|
|
|
|
mSolverBodySleepDataPool.forceSize_Unsafe(0);
|
|
mSolverBodySleepDataPool.reserve(totalBodyAlignedCounts);
|
|
|
|
mSolverTxIDataPool.forceSize_Unsafe(0);
|
|
mSolverTxIDataPool.reserve(totalBodyAlignedCounts);
|
|
}
|
|
|
|
if ((kinematicCount + bodyCount + 1 + articulationCount) > mActiveNodeIndex.capacity())
|
|
{
|
|
const PxU32 totalArticulationAlignedCounts = (kinematicCount + bodyCount + 1 + articulationCount + 31) & (~31);
|
|
|
|
mActiveNodeIndex.forceSize_Unsafe(0);
|
|
mActiveNodeIndex.reserve(totalArticulationAlignedCounts);
|
|
}
|
|
|
|
if ((kinematicCount + 31 + 1) > mSolverBodyDataPool.capacity())
|
|
{
|
|
mSolverBodyDataPool.reserve((kinematicCount + 31 + 1) & (~31));
|
|
}
|
|
|
|
mActiveNodeIndex.forceSize_Unsafe(1 + kinematicCount + bodyCount + articulationCount);
|
|
|
|
//Set up constraint batches
|
|
const PxU32 totalBodySize = 1 + kinematicCount + bodyCount;
|
|
mSolverBodyPool.forceSize_Unsafe(totalBodySize);
|
|
|
|
mBody2WorldPool.forceSize_Unsafe(totalBodySize);
|
|
//we don't need to create dynamic solver body data in cpu anymore
|
|
mSolverBodyDataPool.forceSize_Unsafe(1 + kinematicCount);
|
|
//we need to dma up static+kinematic part of the sleepData and we dma up the whole sleepData array
|
|
mSolverBodySleepDataPool.forceSize_Unsafe(totalBodySize);
|
|
mSolverTxIDataPool.forceSize_Unsafe(totalBodySize);
|
|
}
|
|
|
|
if (getEnableDirectGPUAPI())
|
|
{
|
|
getSimulationController()->getJointManager().reserveMemoryPreAddRemove();
|
|
}
|
|
|
|
if (needsSolve(islandSim, bodyCount, articulationCount))
|
|
{
|
|
//Set up gpu workloads early!!!
|
|
const PxNodeIndex* const PX_RESTRICT nodeIndices = islandSim.getActiveNodes(IG::Node::eRIGID_BODY_TYPE);
|
|
const PxNodeIndex* const PX_RESTRICT articulationNodeIndices = islandSim.getActiveNodes(IG::Node::eARTICULATION_TYPE);
|
|
|
|
PxMemCopy(mActiveNodeIndex.begin() + 1, islandSim.getActiveKinematics(), islandSim.getNbActiveKinematics() * sizeof(PxNodeIndex));
|
|
PxMemCopy(mActiveNodeIndex.begin() + 1 + kinematicCount, nodeIndices, sizeof(PxNodeIndex) * mBodyCount);
|
|
PxMemCopy(mActiveNodeIndex.begin() + mArticulationStartIndex, articulationNodeIndices, sizeof(PxNodeIndex) * mArticulationCount);
|
|
|
|
mActiveNodeIndex[0] = PxNodeIndex();
|
|
|
|
PxgSimulationController* controller = static_cast<PxgSimulationController*>(mSimulationController);
|
|
const PxU32 maxLinks = controller->getMaxLinks();
|
|
//DMA up the body data right now and any other data that might be available
|
|
mGpuSolverCore->allocateSolverBodyBuffers(mIslandManager.getNbNodeHandles() + 1, mActiveNodeIndex, mArticulationCount, maxLinks);
|
|
|
|
mSolvedThisFrame = true;
|
|
|
|
//solver task chain!
|
|
//Note - *all* work for *all* islands is processed in phases using a wide-model approach.
|
|
//This is friendlier for the GPU but can be more wasteful in terms of memory
|
|
mGpuTask.setContinuation(continuation);
|
|
mGpuPrePrepTask.setContinuation(&mGpuTask);
|
|
mPrepTask.setContinuation(&mGpuPrePrepTask);
|
|
mPreIntegrationTask.setContinuation(&mPrepTask);
|
|
mGpuIntegrationTask.setContinuation(&mGpuPrePrepTask);
|
|
|
|
//Set up world rigid body
|
|
mSolverBodyPool[0] = mWorldSolverBody;
|
|
mSolverBodyDataPool[0] = mWorldSolverBodyData;
|
|
mSolverTxIDataPool[0] = mWorldTxIData;
|
|
mSolverBodySleepDataPool[0] = mWorldSolverBodySleepData;
|
|
|
|
// these two are being launched immediately.
|
|
mGpuIntegrationTask.removeReference();
|
|
mPreIntegrationTask.removeReference();
|
|
}
|
|
|
|
// PT: when updateIncrementalIslands() is single-threaded this is a blocking call and we can use the
|
|
// partitioning data when it returns. This is not the case anymore with multi-threaded implementations.
|
|
|
|
// doConstraintPrePrepCommon() consumes the output of the incremental island building as part of mPrepTask
|
|
mIncrementalPartition.updateIncrementalIslands(
|
|
mIslandManager.getAccurateIslandSim(),
|
|
mIslandManager.getAuxCpuData(),
|
|
&flushPool, postPartitioningTask,
|
|
mOutputIterator, // PT: don't pass the local variable, it will go out of scope while the partitioning tasks are using it
|
|
getSimulationController()->getBodySimManager(),
|
|
getSimulationController()->getJointManager());
|
|
|
|
// PT: all the code after the updateIncrementalIslands() call has been moved to PxgGpuContext::updatePostPartitioning() where
|
|
// it can safely be executed after the potential updateIncrementalIslands() tasks are completed.
|
|
|
|
mGpuSolverCore->releaseContext();
|
|
}
|
|
|
|
void PxgGpuContext::updatePostPartitioning(PxBaseTask* lostTouchTask, PxvNphaseImplementationContext* /*nphase*/,
|
|
PxU32 maxPatchesPerCM, PxU32 /*maxArticulationLinks*/,
|
|
PxReal /*dt*/, const PxVec3& /*gravity*/, PxBitMapPinned& changedHandleMap)
|
|
{
|
|
mGpuSolverCore->acquireContext();
|
|
|
|
IG::IslandSim& islandSim = mIslandManager.getAccurateIslandSim();
|
|
|
|
const PxPinnedArray<PartitionIndexData>& partitionIndexDataIter = mIncrementalPartition.getPartitionIndexArray();
|
|
const PxPinnedArray<PartitionNodeData>& partitionNodeData = mIncrementalPartition.getPartitionNodeArray();
|
|
const PxPinnedArray<PxgSolverConstraintManagerConstants>& solverConstantData = mIncrementalPartition.getSolverConstants();
|
|
const PxInt32ArrayPinned& partitionStartBatchIndexIter = mIncrementalPartition.getStartSlabPerPartition();
|
|
const PxInt32ArrayPinned& partitionArticStartBatchIndexIter = mIncrementalPartition.getArticStartSlabPerPartition();
|
|
const PxInt32ArrayPinned& partitionJointBatchCountIter = mIncrementalPartition.getNbJointsPerPartition();
|
|
const PxInt32ArrayPinned& partitionArtiJointBatchCountIter = mIncrementalPartition.getNbArticJointsPerPartition();
|
|
|
|
const PxArray<PxU32>& npIndexArrayIter = mIncrementalPartition.getNpIndexArray();
|
|
PxInt32ArrayPinned& npIndexArrayStagingBuffer = mNodeIndicesStagingBuffer;
|
|
PxInt32ArrayPinned& islandIds = mIslandIds;
|
|
PxInt32ArrayPinned& islandStaticTouchCounts = mIslandStaticTouchCounts;
|
|
|
|
const PxU32 nbConstraints = islandSim.getNbActiveEdges(IG::Edge::eCONSTRAINT);
|
|
|
|
// At this point we are ready to allocate the pinned memory for the solver.
|
|
allocateTempPinnedSolverMemoryCommon();
|
|
|
|
const PxU32 bodyCount = mBodyCount;
|
|
const PxU32 kinematicCount = mKinematicCount;
|
|
const PxU32 articulationCount = mArticulationCount;
|
|
|
|
//Force all bodies into a single island. The GPU partitioning provides better work balancing between blocks than just using multiple islands.
|
|
PxgIslandContext& context = mIslandContextPool[0];
|
|
context.mBodyStartIndex = 1 + kinematicCount;
|
|
context.mBodyCount = bodyCount;
|
|
context.mArticulationCount = articulationCount;
|
|
context.mNumPositionIterations = context.mNumVelocityIterations = 0;
|
|
mNumIslandContextPool = 1;
|
|
|
|
//because updateIncrementalIslands add/remove joints based on activation
|
|
getSimulationController()->updateJointsAndSyncData();
|
|
|
|
//reset number of frozen/unfrozen shapes to be zero
|
|
mSimulationController->clear();
|
|
|
|
PxgJointManager& jointManager = getSimulationController()->getJointManager();
|
|
PX_ASSERT((jointManager.getCpuNbRigidConstraints() + jointManager.getCpuNbArtiConstraints() +
|
|
jointManager.getGpuNbActiveRigidConstraints() + jointManager.getGpuNbActiveArtiConstraints()) == nbConstraints);
|
|
|
|
PX_UNUSED(jointManager);
|
|
|
|
const PxU32 nbPatches = mIncrementalPartition.getTotalContacts(); // PT: same as what mIncrementalPartition.updateIncrementalIslands() returned
|
|
|
|
#if PX_ENABLE_ASSERTS
|
|
PxU32 accumulatedConstraints = mIncrementalPartition.getAccumulatedConstraintCount().size() == 0 ? 0 : mIncrementalPartition.getAccumulatedConstraintCount()[mIncrementalPartition.getAccumulatedConstraintCount().size() - 1];
|
|
PxU32 accumulatedArtiConstraints = mIncrementalPartition.getAccumulatedArtiConstraintCount().size() == 0 ? 0 : mIncrementalPartition.getAccumulatedArtiConstraintCount()[mIncrementalPartition.getAccumulatedArtiConstraintCount().size() - 1];
|
|
PX_ASSERT((nbPatches + islandSim.getNbActiveEdges(IG::Edge::eCONSTRAINT) + mIncrementalPartition.getTotalArticulationContacts()) == (accumulatedConstraints + accumulatedArtiConstraints + getSimulationController()->getBodySimManager().mTotalStaticArticJoints +
|
|
getSimulationController()->getBodySimManager().mTotalSelfArticJoints + getSimulationController()->getBodySimManager().mTotalStaticRBJoints));
|
|
#endif
|
|
|
|
{
|
|
PX_PROFILE_ZONE("Dynamics.allocateConstraintBuffers", 0);
|
|
|
|
//set the constraint batches number but we will do the actual memory allocation in doPartitionTask() method and free the excess amout in doConstraintPrePrepCommon(), so that
|
|
//we can make sure mConstraintBatches is the last element allocated in the pinned memory allocator, therefore, we can shrunk the excess memory safely
|
|
//mNumConstraintBatches = sentinel->constraints + sentinel->contactManagers;
|
|
|
|
PxgBodySimManager& bodyManager = getSimulationController()->getBodySimManager();
|
|
|
|
mNumContactManagers = nbPatches + bodyManager.mTotalStaticRBContacts;
|
|
mNum1DConstraints = nbConstraints + bodyManager.mTotalStaticRBJoints;
|
|
|
|
mThresholdStream->forceSize_Unsafe(0);
|
|
mThresholdStream->reserve(PxNextPowerOfTwo(mNumContactManagers));
|
|
|
|
mForceChangedThresholdStream->forceSize_Unsafe(0);
|
|
mForceChangedThresholdStream->reserve(PxNextPowerOfTwo(mNumContactManagers));
|
|
|
|
//Set up constraint batches
|
|
//If there is no work to do then we can do nothing at all.
|
|
|
|
// AD: this only works because we have the same if when setting up the task chain.
|
|
// it's also in a somewhat weird place. We should analyze the dependencies, is all of the work we're doing up to here actually
|
|
// required to happen even if we early-out here?
|
|
if (!needsSolve(islandSim, bodyCount, articulationCount))
|
|
{
|
|
mGpuSolverCore->releaseContext();
|
|
return;
|
|
}
|
|
|
|
//printf("NbarticBatches = %i, NbRigidBatches = %i\n", mIncrementalPartition.mNbArtiContactBatches, mIncrementalPartition.mNbContactBatches);
|
|
}
|
|
|
|
PxU32 descCount = 0;
|
|
|
|
PxU32 currentDescIndex = 0;
|
|
|
|
mGpuSolverCore->resetMemoryAllocator();
|
|
|
|
PxU32 totalEdges = mIslandManager.getNbEdgeHandles();
|
|
mTotalPreviousEdges = mTotalEdges;
|
|
mTotalEdges = totalEdges;
|
|
|
|
mGpuSolverCore->allocateFrictionPatchIndexStream(totalEdges * maxPatchesPerCM); //How many batches
|
|
|
|
mGpuSolverCore->allocateFrictionCounts(totalEdges);
|
|
|
|
currentDescIndex = mIncrementalPartition.getTotalConstraints() + mIncrementalPartition.getTotalContacts();
|
|
|
|
context.mDescCount = currentDescIndex;
|
|
context.mDescStartIndex = descCount;
|
|
descCount += currentDescIndex;
|
|
|
|
lostTouchTask->addReference();
|
|
mLostTouchTask = lostTouchTask;
|
|
|
|
npIndexArrayStagingBuffer.forceSize_Unsafe(0);
|
|
npIndexArrayStagingBuffer.reserve(npIndexArrayIter.size());
|
|
npIndexArrayStagingBuffer.forceSize_Unsafe(npIndexArrayIter.size());
|
|
|
|
islandIds.forceSize_Unsafe(0);
|
|
islandIds.reserve(islandSim.getNbNodes());
|
|
islandIds.forceSize_Unsafe(islandSim.getNbNodes());
|
|
|
|
islandStaticTouchCounts.forceSize_Unsafe(0);
|
|
islandStaticTouchCounts.reserve(islandSim.getNbIslands());
|
|
islandStaticTouchCounts.forceSize_Unsafe(islandSim.getNbIslands());
|
|
|
|
//npIndexArray might be changed in island gen while solver is running, so we need to double buffer it
|
|
PxMemCopy(npIndexArrayStagingBuffer.begin(), npIndexArrayIter.begin(), sizeof(PxU32) * npIndexArrayIter.size());
|
|
PxMemCopy(islandIds.begin(), islandSim.getIslandIds(), sizeof(PxU32) * islandSim.getNbNodes());
|
|
PxMemCopy(islandStaticTouchCounts.begin(), islandSim.getIslandStaticTouchCount(), sizeof(PxU32) * islandSim.getNbIslands());
|
|
|
|
const PxInt32ArrayPinned& nodeInteractions = mIncrementalPartition.getNodeInteractionCountArray();
|
|
|
|
mGpuSolverCore->gpuMemDMAUpContactData(mContactStreamAllocators[mCurrentContactStream],
|
|
PxToU32(mContactStreamPool.mSharedDataIndex),
|
|
mContactStreamPool.mSharedDataIndexGPU,
|
|
mPatchStreamAllocators[mCurrentContactStream],
|
|
PxToU32(mPatchStreamPool.mSharedDataIndex),
|
|
mPatchStreamPool.mSharedDataIndexGPU,
|
|
mNumContactManagers,
|
|
partitionIndexDataIter.begin(), partitionNodeData.begin(), solverConstantData.begin(), solverConstantData.size(), partitionIndexDataIter.size(),
|
|
partitionStartBatchIndexIter.begin(), partitionArticStartBatchIndexIter.begin(), partitionJointBatchCountIter.begin(), partitionArtiJointBatchCountIter.begin(),
|
|
partitionStartBatchIndexIter.size(),
|
|
mIncrementalPartition.getDestroyedContactEdgeIndices().begin(), mIncrementalPartition.getDestroyedContactEdgeIndices().size(),
|
|
npIndexArrayStagingBuffer.begin(), npIndexArrayStagingBuffer.size(),
|
|
/*jointManager.mGpuJointData, jointManager.mGpuJointPrePrep, gpuJointSize,*/ mConstraintWriteBackPool.size(),
|
|
islandIds.begin(), nodeInteractions.begin(), islandIds.size(), islandStaticTouchCounts.begin(), islandStaticTouchCounts.size());
|
|
|
|
mGpuSolverCore->releaseContext();
|
|
|
|
mGpuTask.setMaxNodesAndWordCounts(mIslandManager.getNbNodeHandles(), changedHandleMap);
|
|
|
|
//Now we have kicked off all the atom integration and pre-prep work, so we can permit the remaining phases of the solver to run...
|
|
//mPostSolveTask.removeReference();
|
|
mGpuTask.removeReference();
|
|
mGpuPrePrepTask.removeReference();
|
|
mPrepTask.removeReference();
|
|
}
|
|
|
|
}
|