feat(physics): wire physx sdk into build

This commit is contained in:
2026-04-15 12:22:15 +08:00
parent 5bf258df6d
commit 31f40e2cbb
2044 changed files with 752623 additions and 1 deletions

View File

@@ -0,0 +1,449 @@
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef __ARTI_DYNAMIC_CUH__
#define __ARTI_DYNAMIC_CUH__
#include "PxgArticulation.h"
#include "PxgArticulationLink.h"
#include "DyFeatherstoneArticulationUtils.h"
#include "DyFeatherstoneArticulation.h"
//This function stores Q-stZ to mDeferredQstZ
static __device__ Cm::UnAlignedSpatialVector propagateImpulseW_0(const PxVec3& childToParent,
PxgArticulationBlockDofData* PX_RESTRICT dofData, const Cm::UnAlignedSpatialVector& Z,
const PxU32 dofCount, const PxU32 threadIndexInWarp,
const PxReal* PX_RESTRICT jointForce = NULL, const PxReal jointForceMultiplier = 1.0f)
{
Cm::UnAlignedSpatialVector temp = Z;
Cm::UnAlignedSpatialVector sas[3];
Cm::UnAlignedSpatialVector isInvD[3];
PxReal jf[3];
// the split into two separate loops is an optimization that allows dispatching the loads as early as possible.
#pragma unroll 3
for (PxU32 ind = 0; ind < 3; ++ind)
{
if (ind < dofCount)
{
sas[ind] = loadSpatialVector(dofData[ind].mWorldMotionMatrix, threadIndexInWarp);
isInvD[ind] = loadSpatialVector(dofData[ind].mIsInvDW, threadIndexInWarp);
jf[ind] = (jointForce ? jointForce[ind] * jointForceMultiplier : 0.0f);
}
}
#pragma unroll 3
for (PxU32 ind = 0; ind < 3; ++ind)
{
if (ind < dofCount)
{
const PxReal stZ = jf[ind] - sas[ind].innerProduct(Z);
dofData[ind].mDeferredQstZ[threadIndexInWarp] += stZ;
temp += isInvD[ind] * stZ;
}
}
//parent space's spatial zero acceleration impulse
return Dy::FeatherstoneArticulation::translateSpatialVector(childToParent, temp);
}
static __device__ Cm::UnAlignedSpatialVector propagateImpulseWTemp(const PxVec3& childToParent,
PxgArticulationBlockDofData* PX_RESTRICT dofData, const Cm::UnAlignedSpatialVector& Z,
const PxU32 dofCount, const PxU32 threadIndexInWarp)
{
Cm::UnAlignedSpatialVector temp = Z;
assert(dofCount<=3);
for (PxU32 ind = 0; ind < 3; ++ind)
{
if(ind<dofCount)
{
const Cm::UnAlignedSpatialVector sa = loadSpatialVector(dofData[ind].mWorldMotionMatrix, threadIndexInWarp);
const Cm::UnAlignedSpatialVector isInvD = loadSpatialVector(dofData[ind].mIsInvDW, threadIndexInWarp);
const PxReal stZ = -sa.innerProduct(Z);
dofData[ind].mTmpQstZ[threadIndexInWarp] += stZ;
temp += isInvD * stZ;
}
}
//parent space's spatial zero acceleration impulse
return Dy::FeatherstoneArticulation::translateSpatialVector(childToParent, temp);
}
static __device__ Cm::UnAlignedSpatialVector propagateImpulseW_1(
const PxVec3& childToParent,
const PxgArticulationBlockDofData* PX_RESTRICT dofData,
const Cm::UnAlignedSpatialVector& Z,
const PxReal* jointDofImpulses, const PxU32 dofCount,
const PxU32 threadIndexInWarp,
PxReal* qstZ)
{
Cm::UnAlignedSpatialVector temp = Z;
assert(dofCount<=3);
for (PxU32 ind = 0; ind < 3; ++ind)
{
if(ind<dofCount)
{
const Cm::UnAlignedSpatialVector sa = loadSpatialVector(dofData[ind].mWorldMotionMatrix, threadIndexInWarp);
const Cm::UnAlignedSpatialVector isInvD = loadSpatialVector(dofData[ind].mIsInvDW, threadIndexInWarp);
const PxReal jointDofImpulse = jointDofImpulses ? jointDofImpulses[ind] : 0.0f;
const PxReal QMinusSTZ = jointDofImpulse - sa.innerProduct(Z);
qstZ[ind] += QMinusSTZ;
temp += isInvD * QMinusSTZ;
}
}
//parent space's spatial zero acceleration impulse
return Dy::FeatherstoneArticulation::translateSpatialVector(childToParent, temp);
}
static __device__ Cm::UnAlignedSpatialVector propagateAccelerationW(const PxVec3& c2p, const float3* invStIsT,
const Cm::UnAlignedSpatialVector* motionMatrix, const Cm::UnAlignedSpatialVector& hDeltaV, const PxU32 dofCount,
const Cm::UnAlignedSpatialVector* IsW, const PxReal* qstZ)
{
Cm::UnAlignedSpatialVector pDeltaV = Dy::FeatherstoneArticulation::translateSpatialVector(-c2p, hDeltaV); //parent velocity change
//Convert parent velocity change into an impulse
PxReal tJointDelta[3] = { 0.f, 0.f, 0.f };
#pragma unroll 3
for (PxU32 ind = 0; ind < 3; ++ind)
{
if (ind < dofCount)
{
//stI * pAcceleration
const PxReal temp = IsW[ind].innerProduct(pDeltaV);
tJointDelta[ind] = (qstZ[ind] - temp);
}
}
assert(dofCount<=3);
for (PxU32 ind = 0; ind < 3; ++ind)
{
if(ind<dofCount)
{
const float3 iStIsTi = invStIsT[ind];
const PxReal jDelta = iStIsTi.x * tJointDelta[0]
+ iStIsTi.y * tJointDelta[1]
+ iStIsTi.z * tJointDelta[2];
pDeltaV += motionMatrix[ind] * jDelta;
}
}
return pDeltaV;
}
static __device__ Cm::UnAlignedSpatialVector computeSpatialJointDelta(
const PxgArticulationBlockDofData* PX_RESTRICT dofData,
const PxReal* PX_RESTRICT QSTZMinusISDotTranslatedParentDeltaV, PxReal* PX_RESTRICT jointDeltaDofSpeeds, const PxU32 dofCount,
const PxU32 threadIndexInWarp)
{
Cm::UnAlignedSpatialVector sas[3];
// the split into two separate loops is an optimization that allows dispatching the loads as early as possible.
#pragma unroll 3
for (PxU32 ind = 0; ind < 3; ++ind)
{
if (ind < dofCount)
{
sas[ind] = loadSpatialVector(dofData[ind].mWorldMotionMatrix, threadIndexInWarp);
}
}
Cm::UnAlignedSpatialVector jointSpatialDeltaV(PxVec3(0.f), PxVec3(0.f));
#pragma unroll 3
for (PxU32 ind = 0; ind < 3; ++ind)
{
if (ind < dofCount)
{
const float iStIsTi_x = dofData[ind].mInvStIsT_x[threadIndexInWarp];
const float iStIsTi_y = dofData[ind].mInvStIsT_y[threadIndexInWarp];
const float iStIsTi_z = dofData[ind].mInvStIsT_z[threadIndexInWarp];
const PxReal jDelta = iStIsTi_x * QSTZMinusISDotTranslatedParentDeltaV[0]
+ iStIsTi_y * QSTZMinusISDotTranslatedParentDeltaV[1]
+ iStIsTi_z * QSTZMinusISDotTranslatedParentDeltaV[2];
if(jointDeltaDofSpeeds)
jointDeltaDofSpeeds[ind] = jDelta;
jointSpatialDeltaV += sas[ind] * jDelta;
}
}
return jointSpatialDeltaV;
}
//This function use mDeferredQstZ
static __device__ Cm::UnAlignedSpatialVector propagateAccelerationW(const PxVec3& c2p,
const PxgArticulationBlockDofData* PX_RESTRICT dofData,
const Cm::UnAlignedSpatialVector& hDeltaV,
const PxU32 dofCount, PxReal* jointDeltaDofSpeeds, const PxU32 threadIndexInWarp)
{
const Cm::UnAlignedSpatialVector pDeltaV = Dy::FeatherstoneArticulation::translateSpatialVector(-c2p, hDeltaV); //parent velocity change
//[(Q - S^T *Z)] - [(I*S).innerProduct(translated(parentDeltaV))]
PxReal QSTZMinusISDotTranslatedParentDeltaV[3] = { 0.f, 0.f, 0.f };
Cm::UnAlignedSpatialVector IsW;
#pragma unroll(3)
for (PxU32 ind = 0; ind < 3; ++ind)
{
if (ind < dofCount)
{
IsW = loadSpatialVector(dofData[ind].mIsW, threadIndexInWarp);
//stI * pAcceleration
const PxReal temp = IsW.innerProduct(pDeltaV);
QSTZMinusISDotTranslatedParentDeltaV[ind] = (dofData[ind].mDeferredQstZ[threadIndexInWarp] - temp);
}
}
const Cm::UnAlignedSpatialVector jointSpatialDeltaV = computeSpatialJointDelta(dofData, QSTZMinusISDotTranslatedParentDeltaV, jointDeltaDofSpeeds, dofCount, threadIndexInWarp);
return pDeltaV + jointSpatialDeltaV;
}
//This function use mTmpQstZ
static __device__ Cm::UnAlignedSpatialVector propagateAccelerationWTemp(const PxVec3& c2p,
const PxgArticulationBlockDofData* PX_RESTRICT dofData,
const Cm::UnAlignedSpatialVector& hDeltaV,
const PxU32 dofCount, const PxU32 threadIndexInWarp)
{
const Cm::UnAlignedSpatialVector pDeltaV = Dy::FeatherstoneArticulation::translateSpatialVector(-c2p, hDeltaV); //parent velocity change
//[(Q - S^T *Z)] - [(I*S).innerProduct(translated(parentDeltaV))]
PxReal QSTZMinusISDotTransaltedParentDeltaV[3] = { 0.f, 0.f, 0.f };
#pragma unroll(3)
for (PxU32 ind = 0; ind < 3; ++ind)
{
if (ind < dofCount)
{
const Cm::UnAlignedSpatialVector IsW = loadSpatialVector(dofData[ind].mIsW, threadIndexInWarp);
//stI * pAcceleration
const PxReal temp = IsW.innerProduct(pDeltaV);
QSTZMinusISDotTransaltedParentDeltaV[ind] = (dofData[ind].mTmpQstZ[threadIndexInWarp] - temp);
}
}
const Cm::UnAlignedSpatialVector jointSpatialDeltaV = computeSpatialJointDelta(dofData, QSTZMinusISDotTransaltedParentDeltaV, NULL, dofCount, threadIndexInWarp);
return pDeltaV + jointSpatialDeltaV;
}
//This function use qstZ as input
static __device__ Cm::UnAlignedSpatialVector propagateAccelerationW(const PxVec3& c2p,
const PxgArticulationBlockDofData* PX_RESTRICT dofData,
const Cm::UnAlignedSpatialVector& hDeltaV,
const PxU32 dofCount, const PxU32 threadIndexInWarp,
const PxReal* qstZ)
{
const Cm::UnAlignedSpatialVector pDeltaV = Dy::FeatherstoneArticulation::translateSpatialVector(-c2p, hDeltaV); //parent velocity change
//[(Q - S^T *Z)] - [(I*S).innerProduct(translated(parentDeltaV))]
PxReal QSTZMinusISDotTransaltedParentDeltaV[3] = { 0.f, 0.f, 0.f };
Cm::UnAlignedSpatialVector IsW;
#pragma unroll(3)
for (PxU32 ind = 0; ind < 3; ++ind)
{
if (ind < dofCount)
{
IsW = loadSpatialVector(dofData[ind].mIsW, threadIndexInWarp);
//stI * pAcceleration
const PxReal temp = IsW.innerProduct(pDeltaV);
QSTZMinusISDotTransaltedParentDeltaV[ind] = (qstZ[ind] - temp);
}
}
const Cm::UnAlignedSpatialVector jointSpatialDeltaV = computeSpatialJointDelta(dofData, QSTZMinusISDotTransaltedParentDeltaV, NULL, dofCount, threadIndexInWarp);
return pDeltaV + jointSpatialDeltaV;
}
static __device__ Cm::UnAlignedSpatialVector propagateAccelerationW(
PxgArticulationBlockLinkData& linkData,
PxgArticulationBlockDofData* dofData,
const PxU32 dofCount,
const Cm::UnAlignedSpatialVector& hDeltaV,
const PxU32 threadIndexInWarp)
{
const float c2px = linkData.mRw_x[threadIndexInWarp];
const float c2py = linkData.mRw_y[threadIndexInWarp];
const float c2pz = linkData.mRw_z[threadIndexInWarp];
float3 invStIsT[3];
PxReal tJointDelta[3] = { 0.f, 0.f, 0.f };
Cm::UnAlignedSpatialVector isW[3];
Cm::UnAlignedSpatialVector mMotionMatrix[3];
PxReal jVel[3];
// the split into three separate loops is an optimization that allows dispatching the loads as early as possible.
#pragma unroll 3
for (PxU32 ind = 0; ind < 3; ++ind)
{
if (ind < dofCount)
{
isW[ind] = loadSpatialVector(dofData[ind].mIsW, threadIndexInWarp);
tJointDelta[ind] = (dofData[ind].mDeferredQstZ[threadIndexInWarp]);
jVel[ind] = dofData[ind].mJointVelocities[threadIndexInWarp];
invStIsT[ind] = make_float3(dofData[ind].mInvStIsT_x[threadIndexInWarp], dofData[ind].mInvStIsT_y[threadIndexInWarp], dofData[ind].mInvStIsT_z[threadIndexInWarp]);
mMotionMatrix[ind] = loadSpatialVector(dofData[ind].mWorldMotionMatrix, threadIndexInWarp);
}
}
Cm::UnAlignedSpatialVector pDeltaV = Dy::FeatherstoneArticulation::translateSpatialVector(PxVec3(-c2px, -c2py, -c2pz), hDeltaV); //parent velocity change
#pragma unroll 3
for (PxU32 ind = 0; ind < 3; ++ind)
{
if (ind < dofCount)
{
tJointDelta[ind] -= isW[ind].innerProduct(pDeltaV);
}
}
#pragma unroll 3
for (PxU32 ind = 0; ind < 3; ++ind)
{
if (ind < dofCount)
{
const PxReal jDelta = invStIsT[ind].x * tJointDelta[0] + invStIsT[ind].y * tJointDelta[1]
+ invStIsT[ind].z * tJointDelta[2];
dofData[ind].mJointVelocities[threadIndexInWarp] = jVel[ind] + jDelta;
pDeltaV += mMotionMatrix[ind] * jDelta;
}
}
return pDeltaV;
}
// There is a another version of this function in forwardDynamic2.cu which additionally writes
// the link velocity to a global buffer
static void __device__ PxcFsFlushVelocity(PxgArticulationBlockData& articulation,
PxgArticulationBlockLinkData* PX_RESTRICT artiLinks,
PxgArticulationBlockDofData* PX_RESTRICT artiDofs,
PxU32 linkCount, bool fixBase, const PxU32 threadIndexInWarp)
{
Cm::UnAlignedSpatialVector deltaV = Cm::UnAlignedSpatialVector::Zero();
Cm::UnAlignedSpatialVector deferredZ = -loadSpatialVector(articulation.mRootDeferredZ, threadIndexInWarp);
if (!fixBase)
{
//ArticulationLink& link = links[0];
// PT: preload data
const Cm::UnAlignedSpatialVector motionVelocity0 = loadSpatialVector(artiLinks[0].mMotionVelocity, threadIndexInWarp);
const Cm::UnAlignedSpatialVector solverSpatialDeltaVel0 = loadSpatialVector(artiLinks[0].mSolverSpatialDeltaVel, threadIndexInWarp);
Dy::SpatialMatrix invInertia;
loadSpatialMatrix(articulation.mInvSpatialArticulatedInertia, threadIndexInWarp, invInertia);
//deltaV = invInertia * (-loadSpatialVector(artiLinks[0].mDeferredZ, threadIndexInWarp));
deltaV = invInertia * deferredZ;
//motionVelocities[0] += deltaV[0];
storeSpatialVector(artiLinks[0].mMotionVelocity, motionVelocity0 + deltaV, threadIndexInWarp);
//storeSpatialVector(artiLinks[0].mDeferredZ, Cm::UnAlignedSpatialVector::Zero(), threadIndexInWarp);
storeSpatialVector(articulation.mRootDeferredZ, Cm::UnAlignedSpatialVector::Zero(), threadIndexInWarp);
storeSpatialVector(artiLinks[0].mSolverSpatialDeltaVel, solverSpatialDeltaVel0 + deltaV, threadIndexInWarp);
}
storeSpatialVector(artiLinks[0].mScratchDeltaV, deltaV, threadIndexInWarp);
addSpatialVector(artiLinks[0].mConstraintForces, deferredZ, threadIndexInWarp);
storeSpatialVector(articulation.mCommonLinkDeltaVelocity, Cm::UnAlignedSpatialVector::Zero(), threadIndexInWarp);
PxgArticulationBlockDofData* dofs = artiDofs;
if (linkCount > 1)
{
Cm::UnAlignedSpatialVector nextMotionV = loadSpatialVector(artiLinks[1].mMotionVelocity, threadIndexInWarp);
PxU32 nextNbDofs = artiLinks[1].mDofs[threadIndexInWarp];
PxU32 nextParent = artiLinks[1].mParents[threadIndexInWarp];
//Cm::UnAlignedSpatialVector nextDeferredZ = loadSpatialVector(artiLinks[1].mDeferredZ, threadIndexInWarp);
for (PxU32 i = 1; i < linkCount; i++)
{
PxgArticulationBlockLinkData& tLink = artiLinks[i];
const PxU32 nbDofs = nextNbDofs;
const PxU32 parent = nextParent;
const Cm::UnAlignedSpatialVector preloadedConstraintForces = loadSpatialVector(tLink.mConstraintForces, threadIndexInWarp);
const Cm::UnAlignedSpatialVector preloadedSolverSpatialDeltaVel = loadSpatialVector(tLink.mSolverSpatialDeltaVel, threadIndexInWarp);
Cm::UnAlignedSpatialVector motionV = nextMotionV;
//const Cm::UnAlignedSpatialVector deferredZ = nextDeferredZ;
//storeSpatialVector(tLink.mDeferredZ, Cm::UnAlignedSpatialVector::Zero(), threadIndexInWarp);
if ((i + 1) < linkCount)
{
nextMotionV = loadSpatialVector(artiLinks[i + 1].mMotionVelocity, threadIndexInWarp);
nextNbDofs = artiLinks[i + 1].mDofs[threadIndexInWarp];
nextParent = artiLinks[i + 1].mParents[threadIndexInWarp];
//nextDeferredZ = loadSpatialVector(artiLinks[i + 1].mDeferredZ, threadIndexInWarp);
}
if (parent != (i - 1))
deltaV = loadSpatialVector(artiLinks[parent].mScratchDeltaV, threadIndexInWarp);
deltaV = propagateAccelerationW(tLink, dofs, nbDofs, deltaV, threadIndexInWarp);
//Accumulate the DeltaVel arising from solver impulses applied to this link.
storeSpatialVector(tLink.mSolverSpatialDeltaVel, preloadedSolverSpatialDeltaVel + deltaV, threadIndexInWarp);
//zeroing mDeferredQstZ
for (PxU32 ind = 0; ind < nbDofs; ++ind)
{
dofs[ind].mDeferredQstZ[threadIndexInWarp] = 0.f;
}
motionV += deltaV;
storeSpatialVector(tLink.mScratchDeltaV, deltaV, threadIndexInWarp);
//const PxTransform& tBody2World = poses[i];
storeSpatialVector(tLink.mMotionVelocity, motionV, threadIndexInWarp);
storeSpatialVector(tLink.mConstraintForces, preloadedConstraintForces + deltaV, threadIndexInWarp);
dofs += nbDofs;
}
}
}
#endif