feat(physics): wire physx sdk into build

2026-04-15 12:22:15 +08:00
parent 5bf258df6d
commit 31f40e2cbb
2044 changed files with 752623 additions and 1 deletions
--- a/engine/third_party/physx/source/lowleveldynamics/src/DyAllocator.cpp
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DyAllocator.cpp
@@ -0,0 +1,57 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#include "DyAllocator.h"
+
+namespace physx
+{
+namespace Dy
+{
+// PT: the various error messages we used here look the same but they are slightly different. We could unify this really.
+
+const char* gConstraintDataErrorMsg_Null_Joints =
+	"Reached limit set by PxSceneDesc::maxNbContactDataBlocks - ran out of buffer space for constraint prep. "
+	"Either accept joints detaching/exploding or increase buffer size allocated for constraint prep by increasing PxSceneDesc::maxNbContactDataBlocks.";
+
+const char* gConstraintDataErrorMsg_TooLarge_Joints =
+	"Attempting to allocate more than 16K of constraint data. "
+	"Either accept joints detaching/exploding or simplify constraints.";
+
+const char* gConstraintDataErrorMsg_Null_Contacts =
+	"Reached limit set by PxSceneDesc::maxNbContactDataBlocks - ran out of buffer space for constraint prep. "
+	"Either accept dropped contacts or increase buffer size allocated for narrow phase by increasing PxSceneDesc::maxNbContactDataBlocks.";
+
+const char* gConstraintDataErrorMsg_TooLarge_Contacts =
+	"Attempting to allocate more than 16K of contact data for a single contact pair in constraint prep. "
+	"Either accept dropped contacts or simplify collision geometry.";
+
+const char* gFrictionDataErrorMsg_TooLarge =
+	"Attempting to allocate more than 16K of friction data for a single contact pair in constraint prep. "
+	"Either accept dropped contacts or simplify collision geometry.";
+}
+}
--- a/engine/third_party/physx/source/lowleveldynamics/src/DyAllocator.h
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DyAllocator.h
@@ -0,0 +1,82 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+   
+#ifndef DY_ALLOCATOR_H
+#define DY_ALLOCATOR_H
+
+#include "foundation/PxFoundation.h"
+
+namespace physx
+{
+namespace Dy
+{
+	extern const char* gConstraintDataErrorMsg_Null_Joints;
+	extern const char* gConstraintDataErrorMsg_TooLarge_Joints;
+	extern const char* gConstraintDataErrorMsg_Null_Contacts;
+	extern const char* gConstraintDataErrorMsg_TooLarge_Contacts;
+	extern const char* gFrictionDataErrorMsg_TooLarge;
+
+	template<const bool jointsErrorMsg>
+	PX_FORCE_INLINE static bool checkConstraintDataPtr(void* ptr)
+	{
+		if(NULL==ptr)
+		{
+			PX_WARN_ONCE(jointsErrorMsg ? gConstraintDataErrorMsg_Null_Joints : gConstraintDataErrorMsg_Null_Contacts);
+			return false;
+		}
+		// PT: I leave this codepath here for now but I think this is not needed anymore.
+		// The reserveConstraintData() calls do not return -1 anymore.
+		else if(ptr==reinterpret_cast<void*>(-1))
+		{
+			PX_WARN_ONCE(jointsErrorMsg ? gConstraintDataErrorMsg_TooLarge_Joints : gConstraintDataErrorMsg_TooLarge_Contacts);
+			return false;
+		}
+		return true;
+	}
+
+	PX_FORCE_INLINE static bool checkFrictionDataPtr(void* ptr)
+	{
+		if(NULL==ptr)
+		{
+			// PT: for friction the error msg here is similar to gConstraintDataErrorMsg_Null_Contacts
+			PX_WARN_ONCE(gConstraintDataErrorMsg_Null_Contacts);
+			return false;
+		}
+		// PT: for friction this is still needed....
+		else if(ptr==reinterpret_cast<void*>(-1))
+		{
+			PX_WARN_ONCE(gFrictionDataErrorMsg_TooLarge);
+			return false;
+		}
+		return true;
+	}
+}
+}
+
+#endif
+
--- a/engine/third_party/physx/source/lowleveldynamics/src/DyArticulationContactPrep.cpp
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DyArticulationContactPrep.cpp
@@ -0,0 +1,571 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#include "foundation/PxPreprocessor.h"
+#include "foundation/PxVecMath.h"
+#include "DyCorrelationBuffer.h"
+#include "DySolverConstraintExtShared.h"
+#include "DyCpuGpuArticulation.h"
+#include "DyFeatherstoneArticulation.h"
+
+namespace physx
+{
+
+namespace Dy
+{
+
+// constraint-gen only, since these use getVelocity methods
+// which aren't valid during the solver phase
+
+//PX_INLINE void computeFrictionTangents(const aos::Vec3V& vrel,const aos::Vec3V& unitNormal, aos::Vec3V& t0, aos::Vec3V& t1)
+//{
+//	using namespace aos;
+//	//PX_ASSERT(PxAbs(unitNormal.magnitude()-1)<1e-3f);
+//
+//	t0 = V3Sub(vrel, V3Scale(unitNormal, V3Dot(unitNormal, vrel)));
+//	const FloatV ll = V3Dot(t0, t0);
+//
+//	if (FAllGrtr(ll, FLoad(1e10f)))										//can set as low as 0.
+//	{
+//		t0 = V3Scale(t0, FRsqrt(ll));
+//		t1 = V3Cross(unitNormal, t0);
+//	}
+//	else
+//		PxNormalToTangents(unitNormal, t0, t1);		//fallback
+//}
+
+PxReal SolverExtBody::projectVelocity(const PxVec3& linear, const PxVec3& angular) const
+{
+	if (mLinkIndex == PxSolverConstraintDesc::RIGID_BODY)
+	{
+		return mBodyData->projectVelocity(linear, angular);
+	}
+	else
+	{
+		const Cm::SpatialVectorV velocity = mArticulation->getLinkVelocity(mLinkIndex);
+
+		const FloatV fv = velocity.dot(Cm::SpatialVector(linear, angular));
+
+		PxF32 f;
+		FStore(fv, &f);
+		return f;
+		/*PxF32 f;
+		FStore(getVelocity(*mFsData)[mLinkIndex].dot(Cm::SpatialVector(linear, angular)), &f);
+		return f;*/
+	}
+}
+
+PxReal SolverExtBody::getCFM() const
+{
+	return (mLinkIndex == PxSolverConstraintDesc::RIGID_BODY) ? 0.f : 
+		mArticulation->getCfm(mLinkIndex);
+}
+
+aos::FloatV SolverExtBody::projectVelocity(const aos::Vec3V& linear, const aos::Vec3V& angular) const
+{
+	if (mLinkIndex == PxSolverConstraintDesc::RIGID_BODY)
+	{
+		return V3SumElems(V3Add(V3Mul(V3LoadA(mBodyData->linearVelocity), linear), V3Mul(V3LoadA(mBodyData->angularVelocity), angular)));
+	}
+	else
+	{
+		const Cm::SpatialVectorV velocity = mArticulation->getLinkVelocity(mLinkIndex);
+
+		return velocity.dot(Cm::SpatialVectorV(linear, angular));
+	}
+}
+
+
+PxVec3 SolverExtBody::getLinVel() const
+{
+	if (mLinkIndex == PxSolverConstraintDesc::RIGID_BODY)
+		return mBodyData->linearVelocity;
+	else
+	{
+		const Vec3V linear = mArticulation->getLinkVelocity(mLinkIndex).linear;
+
+		PxVec3 result;
+		V3StoreU(linear, result);
+		/*PxVec3 result;
+		V3StoreU(getVelocity(*mFsData)[mLinkIndex].linear, result);*/
+		return result;
+	}
+}
+
+PxVec3 SolverExtBody::getAngVel() const
+{
+	if(mLinkIndex == PxSolverConstraintDesc::RIGID_BODY)
+		return mBodyData->angularVelocity;
+	else
+	{
+		const Vec3V angular = mArticulation->getLinkVelocity(mLinkIndex).angular;
+		
+		PxVec3 result;
+		V3StoreU(angular, result);
+		/*PxVec3 result;
+		V3StoreU(getVelocity(*mFsData)[mLinkIndex].angular, result);*/
+		return result;
+	}
+}
+
+aos::Vec3V SolverExtBody::getLinVelV() const
+{
+	if (mLinkIndex == PxSolverConstraintDesc::RIGID_BODY)
+		return V3LoadA(mBodyData->linearVelocity);
+	else
+	{
+		return mArticulation->getLinkVelocity(mLinkIndex).linear;
+	}
+}
+
+
+Vec3V SolverExtBody::getAngVelV() const
+{
+	if (mLinkIndex == PxSolverConstraintDesc::RIGID_BODY)
+		return V3LoadA(mBodyData->angularVelocity);
+	else
+	{
+
+		return mArticulation->getLinkVelocity(mLinkIndex).angular;
+	}
+}
+
+Cm::SpatialVectorV SolverExtBody::getVelocity() const
+{
+	if(mLinkIndex == PxSolverConstraintDesc::RIGID_BODY)
+		return Cm::SpatialVectorV(V3LoadA(mBodyData->linearVelocity), V3LoadA(mBodyData->angularVelocity));
+	else
+		return mArticulation->getLinkVelocity(mLinkIndex);
+}
+
+Cm::SpatialVector createImpulseResponseVector(const PxVec3& linear, const PxVec3& angular, const SolverExtBody& body)
+{
+	if (body.mLinkIndex == PxSolverConstraintDesc::RIGID_BODY)
+	{
+		return Cm::SpatialVector(linear, body.mBodyData->sqrtInvInertia * angular);
+	}
+	return Cm::SpatialVector(linear, angular);
+}
+
+Cm::SpatialVectorV createImpulseResponseVector(const aos::Vec3V& linear, const aos::Vec3V& angular, const SolverExtBody& body)
+{
+	if (body.mLinkIndex == PxSolverConstraintDesc::RIGID_BODY)
+	{
+		return Cm::SpatialVectorV(linear, M33MulV3(M33Load(body.mBodyData->sqrtInvInertia), angular));
+	}
+	return Cm::SpatialVectorV(linear, angular);
+}
+
+PxReal getImpulseResponse(const SolverExtBody& b0, const Cm::SpatialVector& impulse0, Cm::SpatialVector& deltaV0, PxReal dom0, PxReal angDom0, 
+	const SolverExtBody& b1, const Cm::SpatialVector& impulse1, Cm::SpatialVector& deltaV1, PxReal dom1, PxReal angDom1, 
+	bool allowSelfCollision)
+{
+	PxReal response;
+	allowSelfCollision = false;
+	// right now self-collision with contacts crashes the solver
+	
+	//KS - knocked this out to save some space on SPU
+	if(allowSelfCollision && b0.mLinkIndex!=PxSolverConstraintDesc::RIGID_BODY && b0.mArticulation == b1.mArticulation && 0)
+	{
+		/*ArticulationHelper::getImpulseSelfResponse(*b0.mFsData,b0.mLinkIndex, impulse0, deltaV0, 
+													  b1.mLinkIndex, impulse1, deltaV1);*/
+
+		b0.mArticulation->getImpulseSelfResponse(b0.mLinkIndex, b1.mLinkIndex, impulse0.scale(dom0, angDom0), impulse1.scale(dom1, angDom1), 
+			deltaV0, deltaV1);
+		
+		response = impulse0.dot(deltaV0) + impulse1.dot(deltaV1);
+	}
+	else 
+	{
+		if(b0.mLinkIndex == PxSolverConstraintDesc::RIGID_BODY)
+		{
+			deltaV0.linear = impulse0.linear * b0.mBodyData->invMass * dom0;
+			deltaV0.angular = impulse0.angular * angDom0;
+		}
+		else
+		{
+			b0.mArticulation->getImpulseResponse(b0.mLinkIndex, impulse0.scale(dom0, angDom0), deltaV0);
+		}
+
+		response = impulse0.dot(deltaV0);
+		if(b1.mLinkIndex == PxSolverConstraintDesc::RIGID_BODY)
+		{
+			deltaV1.linear = impulse1.linear * b1.mBodyData->invMass * dom1;
+			deltaV1.angular = impulse1.angular * angDom1;
+		}
+		else
+		{
+			b1.mArticulation->getImpulseResponse( b1.mLinkIndex, impulse1.scale(dom1, angDom1), deltaV1);
+		}
+		response += impulse1.dot(deltaV1);
+	}
+
+	return response;
+}
+
+FloatV getImpulseResponse(const SolverExtBody& b0, const Cm::SpatialVectorV& impulse0, Cm::SpatialVectorV& deltaV0, const FloatV& dom0, const FloatV& angDom0,
+	const SolverExtBody& b1, const Cm::SpatialVectorV& impulse1, Cm::SpatialVectorV& deltaV1, const FloatV& dom1, const FloatV& angDom1,
+	bool /*allowSelfCollision*/)
+{
+	Vec3V response;
+	{
+
+		if (b0.mLinkIndex == PxSolverConstraintDesc::RIGID_BODY)
+		{
+			deltaV0.linear = V3Scale(impulse0.linear, FMul(FLoad(b0.mBodyData->invMass), dom0));
+			deltaV0.angular = V3Scale(impulse0.angular, angDom0);
+		}
+		else
+		{
+			b0.mArticulation->getImpulseResponse(b0.mLinkIndex, impulse0.scale(dom0, angDom0), deltaV0);
+		}
+
+		response = V3Add(V3Mul(impulse0.linear, deltaV0.linear), V3Mul(impulse0.angular, deltaV0.angular));
+		if (b1.mLinkIndex == PxSolverConstraintDesc::RIGID_BODY)
+		{
+			deltaV1.linear = V3Scale(impulse1.linear, FMul(FLoad(b1.mBodyData->invMass), dom1));
+			deltaV1.angular = V3Scale(impulse1.angular, angDom1);
+		}
+		else
+		{
+			b1.mArticulation->getImpulseResponse(b1.mLinkIndex, impulse1.scale(dom1, angDom1), deltaV1);
+		}
+		response = V3Add(response, V3Add(V3Mul(impulse1.linear, deltaV1.linear), V3Mul(impulse1.angular, deltaV1.angular)));
+	}
+
+	return V3SumElems(response);
+}
+
+
+
+void setupFinalizeExtSolverContacts(
+	const PxContactPoint* buffer,
+	const CorrelationBuffer& c,
+	const PxTransform& bodyFrame0,
+	const PxTransform& bodyFrame1,
+	PxU8* workspace,
+	const SolverExtBody& b0,
+	const SolverExtBody& b1,
+	const PxReal invDtF32,
+	const PxReal dtF32,
+	PxReal bounceThresholdF32,
+	PxReal invMassScale0, PxReal invInertiaScale0,
+	PxReal invMassScale1, PxReal invInertiaScale1,
+	const PxReal restDist,
+	PxU8* frictionDataPtr,
+	PxReal ccdMaxContactDist,
+	Cm::SpatialVectorF* Z,
+	const PxReal offsetSlop)
+{
+	// NOTE II: the friction patches are sparse (some of them have no contact patches, and
+	// therefore did not get written back to the cache) but the patch addresses are dense,
+	// corresponding to valid patches
+
+	/*const bool haveFriction = PX_IR(n.staticFriction) > 0 || PX_IR(n.dynamicFriction) > 0;*/
+
+	const FloatV ccdMaxSeparation = FLoad(ccdMaxContactDist);
+
+	const Vec3VArg solverOffsetSlop = V3Load(offsetSlop);
+
+	PxU8* PX_RESTRICT ptr = workspace;
+
+	const FloatV zero = FZero();
+
+	//KS - TODO - this should all be done in SIMD to avoid LHS
+	//const PxF32 maxPenBias0 = b0.mLinkIndex == PxSolverConstraintDesc::NO_LINK ? b0.mBodyData->penBiasClamp : getMaxPenBias(*b0.mFsData)[b0.mLinkIndex];
+	//const PxF32 maxPenBias1 = b1.mLinkIndex == PxSolverConstraintDesc::NO_LINK ? b1.mBodyData->penBiasClamp : getMaxPenBias(*b1.mFsData)[b1.mLinkIndex];
+
+	PxF32 maxPenBias0;
+	PxF32 maxPenBias1;
+
+	if (b0.mLinkIndex != PxSolverConstraintDesc::RIGID_BODY)
+	{
+		maxPenBias0 = b0.mArticulation->getLinkMaxPenBias(b0.mLinkIndex);
+	}
+	else
+		maxPenBias0 = b0.mBodyData->penBiasClamp;
+
+	if (b1.mLinkIndex != PxSolverConstraintDesc::RIGID_BODY)
+	{
+		maxPenBias1 = b1.mArticulation->getLinkMaxPenBias(b1.mLinkIndex);
+	}
+	else
+		maxPenBias1 = b1.mBodyData->penBiasClamp;
+
+	const FloatV maxPenBias = FLoad(PxMax(maxPenBias0, maxPenBias1));
+
+	const Vec3V frame0p = V3LoadU(bodyFrame0.p);
+	const Vec3V frame1p = V3LoadU(bodyFrame1.p);
+
+	const Cm::SpatialVectorV vel0 = b0.getVelocity();
+	const Cm::SpatialVectorV vel1 = b1.getVelocity();
+
+	const FloatV quarter = FLoad(0.25f);
+
+	const FloatV d0 = FLoad(invMassScale0);
+	const FloatV d1 = FLoad(invMassScale1);
+
+	const FloatV cfm = FLoad(PxMax(b0.getCFM(), b1.getCFM()));
+
+	const FloatV angD0 = FLoad(invInertiaScale0);
+	const FloatV angD1 = FLoad(invInertiaScale1);
+
+	Vec4V staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W = V4Zero();
+	staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W = V4SetZ(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, d0);
+	staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W = V4SetW(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, d1);
+
+	const FloatV restDistance = FLoad(restDist); 
+
+	PxU32 frictionPatchWritebackAddrIndex = 0;
+
+	PxPrefetchLine(c.contactID);
+	PxPrefetchLine(c.contactID, 128);
+
+	const FloatV invDt = FLoad(invDtF32);
+	const FloatV p8 = FLoad(0.8f);
+	const FloatV bounceThreshold = FLoad(bounceThresholdF32);
+
+	const FloatV invDtp8 = FMul(invDt, p8);
+	const FloatV dt = FLoad(dtF32);
+
+	PxU8 flags = 0;
+
+	for(PxU32 i=0;i<c.frictionPatchCount;i++)
+	{
+		PxU32 contactCount = c.frictionPatchContactCounts[i];
+		if(contactCount == 0)
+			continue;
+
+		const FrictionPatch& frictionPatch = c.frictionPatches[i];
+		PX_ASSERT(frictionPatch.anchorCount <= 2);  //0==anchorCount is allowed if all the contacts in the manifold have a large offset. 
+
+		const PxContactPoint* contactBase0 = buffer + c.contactPatches[c.correlationListHeads[i]].start;
+
+		const PxReal coefficient = (frictionPatch.anchorCount == 2) ? 0.5f : 1.f;
+
+		const PxReal staticFriction = contactBase0->staticFriction * coefficient;
+		const PxReal dynamicFriction = contactBase0->dynamicFriction * coefficient;
+		const bool disableStrongFriction = !!(contactBase0->materialFlags & PxMaterialFlag::eDISABLE_FRICTION);
+		staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W=V4SetX(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, FLoad(staticFriction));
+		staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W=V4SetY(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, FLoad(dynamicFriction));
+		const BoolV accelerationSpring = BLoad(!!(contactBase0->materialFlags & PxMaterialFlag::eCOMPLIANT_ACCELERATION_SPRING));
+	
+		SolverContactHeader* PX_RESTRICT header = reinterpret_cast<SolverContactHeader*>(ptr);
+		ptr += sizeof(SolverContactHeader);		
+
+		PxPrefetchLine(ptr + 128);
+		PxPrefetchLine(ptr + 256);
+		PxPrefetchLine(ptr + 384);
+		
+		const bool haveFriction = (disableStrongFriction == 0) ;//PX_IR(n.staticFriction) > 0 || PX_IR(n.dynamicFriction) > 0;
+		header->numNormalConstr		= PxTo8(contactCount);
+		header->numFrictionConstr	= PxTo8(haveFriction ? frictionPatch.anchorCount*2 : 0);
+	
+		header->type				= PxTo8(DY_SC_TYPE_EXT_CONTACT);
+
+		header->flags = flags;
+
+		const FloatV restitution = FLoad(contactBase0->restitution);
+		const FloatV damping = FLoad(contactBase0->damping);
+	
+		header->staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W = staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W;
+
+		header->angDom0 = invInertiaScale0;
+		header->angDom1 = invInertiaScale1;
+	
+		const PxU32 pointStride = sizeof(SolverContactPointExt);
+		const PxU32 frictionStride = sizeof(SolverContactFrictionExt);
+
+		const Vec3V normal = V3LoadU(buffer[c.contactPatches[c.correlationListHeads[i]].start].normal);
+		
+		FloatV accumImpulse = FZero();
+
+		const FloatV norVel0 = V3Dot(normal, vel0.linear);
+		const FloatV norVel1 = V3Dot(normal, vel1.linear);
+
+		for(PxU32 patch=c.correlationListHeads[i]; 
+			patch!=CorrelationBuffer::LIST_END; 
+			patch = c.contactPatches[patch].next)
+		{
+			const PxU32 count = c.contactPatches[patch].count;
+			const PxContactPoint* contactBase = buffer + c.contactPatches[patch].start;
+				
+			PxU8* p = ptr;
+
+			for(PxU32 j=0;j<count;j++)
+			{
+				const PxContactPoint& contact = contactBase[j];
+
+				SolverContactPointExt* PX_RESTRICT solverContact = reinterpret_cast<SolverContactPointExt*>(p);
+				p += pointStride;
+
+				accumImpulse = FAdd(accumImpulse, setupExtSolverContact(b0, b1, d0, d1, angD0, angD1, frame0p, frame1p, normal, invDt, invDtp8, dt, restDistance, maxPenBias, restitution,
+					bounceThreshold, contact, *solverContact, ccdMaxSeparation, Z, vel0, vel1, cfm, solverOffsetSlop, norVel0, norVel1, damping, accelerationSpring));
+			}
+
+			ptr = p;
+		}
+
+		accumImpulse = FMul(FDiv(accumImpulse, FLoad(PxF32(contactCount))), quarter);
+
+		header->normal_minAppliedImpulseForFrictionW = V4SetW(Vec4V_From_Vec3V(normal), accumImpulse);
+
+		PxF32* forceBuffer = reinterpret_cast<PxF32*>(ptr);
+		PxMemZero(forceBuffer, sizeof(PxF32) * contactCount);
+		ptr += sizeof(PxF32) * ((contactCount + 3) & (~3));
+
+		header->broken = 0;
+
+		if(haveFriction)
+		{
+			//const Vec3V normal = Vec3V_From_PxVec3(buffer.contacts[c.contactPatches[c.correlationListHeads[i]].start].normal);
+			//Vec3V normalS = V3LoadA(buffer[c.contactPatches[c.correlationListHeads[i]].start].normal);
+
+			const Vec3V linVrel = V3Sub(vel0.linear, vel1.linear);
+			//const Vec3V normal = Vec3V_From_PxVec3_Aligned(buffer.contacts[c.contactPatches[c.correlationListHeads[i]].start].normal);
+
+			const FloatV orthoThreshold = FLoad(0.70710678f);
+			const FloatV p1 = FLoad(0.0001f);
+			// fallback: normal.cross((1,0,0)) or normal.cross((0,0,1))
+			const FloatV normalX = V3GetX(normal);
+			const FloatV normalY = V3GetY(normal);
+			const FloatV normalZ = V3GetZ(normal);
+
+			Vec3V t0Fallback1 = V3Merge(zero, FNeg(normalZ), normalY);
+			Vec3V t0Fallback2 = V3Merge(FNeg(normalY), normalX, zero);
+			Vec3V t0Fallback = V3Sel(FIsGrtr(orthoThreshold, FAbs(normalX)), t0Fallback1, t0Fallback2);
+
+			Vec3V t0 = V3Sub(linVrel, V3Scale(normal, V3Dot(normal, linVrel)));
+			t0 = V3Sel(FIsGrtr(V3LengthSq(t0), p1), t0, t0Fallback);
+			t0 = V3Normalize(t0);
+
+			const VecCrossV t0Cross = V3PrepareCross(t0);
+
+			const Vec3V t1 = V3Cross(normal, t0Cross);
+			const VecCrossV t1Cross = V3PrepareCross(t1);
+			
+			//We want to set the writeBack ptr to point to the broken flag of the friction patch.
+			//On spu we have a slight problem here because the friction patch array is 
+			//in local store rather than in main memory. The good news is that the address of the friction 
+			//patch array in main memory is stored in the work unit. These two addresses will be equal 
+			//except on spu where one is local store memory and the other is the effective address in main memory.
+			//Using the value stored in the work unit guarantees that the main memory address is used on all platforms.
+			PxU8* PX_RESTRICT writeback = frictionDataPtr + frictionPatchWritebackAddrIndex*sizeof(FrictionPatch);
+
+			header->frictionBrokenWritebackByte = writeback;			
+
+			for(PxU32 j = 0; j < frictionPatch.anchorCount; j++)
+			{
+				SolverContactFrictionExt* PX_RESTRICT f0 = reinterpret_cast<SolverContactFrictionExt*>(ptr);
+				ptr += frictionStride;
+				SolverContactFrictionExt* PX_RESTRICT f1 = reinterpret_cast<SolverContactFrictionExt*>(ptr);
+				ptr += frictionStride;
+
+				Vec3V ra = V3LoadU(bodyFrame0.q.rotate(frictionPatch.body0Anchors[j]));
+				Vec3V rb = V3LoadU(bodyFrame1.q.rotate(frictionPatch.body1Anchors[j]));
+				Vec3V error = V3Sub(V3Add(ra, frame0p), V3Add(rb, frame1p));
+
+				{
+					Vec3V raXn = V3Cross(ra, t0Cross);
+					Vec3V rbXn = V3Cross(rb, t0Cross);
+					raXn = V3Sel(V3IsGrtr(solverOffsetSlop, V3Abs(raXn)), V3Zero(), raXn);
+					rbXn = V3Sel(V3IsGrtr(solverOffsetSlop, V3Abs(rbXn)), V3Zero(), rbXn);
+
+					Cm::SpatialVectorV deltaV0, deltaV1;
+
+					const Cm::SpatialVectorV resp0 = createImpulseResponseVector(t0, raXn, b0);
+					const Cm::SpatialVectorV resp1 = createImpulseResponseVector(V3Neg(t0), V3Neg(rbXn), b1);
+					FloatV resp = FAdd(cfm, getImpulseResponse(b0, resp0, deltaV0, d0, angD0,
+															 b1, resp1, deltaV1, d1, angD1, reinterpret_cast<Cm::SpatialVectorV*>(Z)));
+
+					const FloatV velMultiplier = FSel(FIsGrtr(resp, FLoad(DY_ARTICULATION_MIN_RESPONSE)), FDiv(p8, resp), zero);
+
+					const PxU32 index = c.contactPatches[c.correlationListHeads[i]].start;
+					FloatV targetVel = V3Dot(V3LoadA(buffer[index].targetVel), t0);
+
+					if(b0.mLinkIndex == PxSolverConstraintDesc::RIGID_BODY)
+						targetVel = FSub(targetVel, b0.projectVelocity(t0, raXn));
+					else if(b1.mLinkIndex == PxSolverConstraintDesc::RIGID_BODY)
+						targetVel = FAdd(targetVel, b1.projectVelocity(t0, rbXn));
+
+					f0->normalXYZ_appliedForceW = V4ClearW(Vec4V_From_Vec3V(t0));
+					f0->raXnXYZ_velMultiplierW = V4SetW(Vec4V_From_Vec3V(resp0.angular), velMultiplier);
+					f0->rbXnXYZ_biasW = V4SetW(V4Neg(Vec4V_From_Vec3V(resp1.angular)), FMul(V3Dot(t0, error), invDt));
+					f0->linDeltaVA = deltaV0.linear;
+					f0->angDeltaVA = deltaV0.angular;
+					f0->linDeltaVB = deltaV1.linear;
+					f0->angDeltaVB = deltaV1.angular;
+					FStore(targetVel, &f0->targetVel);
+				}
+
+				{
+					Vec3V raXn = V3Cross(ra, t1Cross);
+					Vec3V rbXn = V3Cross(rb, t1Cross);
+					raXn = V3Sel(V3IsGrtr(solverOffsetSlop, V3Abs(raXn)), V3Zero(), raXn);
+					rbXn = V3Sel(V3IsGrtr(solverOffsetSlop, V3Abs(rbXn)), V3Zero(), rbXn);
+
+					Cm::SpatialVectorV deltaV0, deltaV1;
+
+					const Cm::SpatialVectorV resp0 = createImpulseResponseVector(t1, raXn, b0);
+					const Cm::SpatialVectorV resp1 = createImpulseResponseVector(V3Neg(t1), V3Neg(rbXn), b1);
+
+					FloatV resp = FAdd(cfm, getImpulseResponse(b0, resp0, deltaV0, d0, angD0,
+														   b1, resp1, deltaV1, d1, angD1, reinterpret_cast<Cm::SpatialVectorV*>(Z)));
+
+					//const FloatV velMultiplier = FSel(FIsGrtr(resp, FLoad(DY_ARTICULATION_MIN_RESPONSE)), FMul(p8, FRecip(resp)), zero);
+
+					const FloatV velMultiplier = FSel(FIsGrtr(resp, FLoad(DY_ARTICULATION_MIN_RESPONSE)), FDiv(p8, resp), zero);
+
+					const PxU32 index = c.contactPatches[c.correlationListHeads[i]].start;
+					FloatV targetVel = V3Dot(V3LoadA(buffer[index].targetVel), t1);
+
+					if(b0.mLinkIndex == PxSolverConstraintDesc::RIGID_BODY)
+						targetVel = FSub(targetVel, b0.projectVelocity(t1, raXn));
+					else if(b1.mLinkIndex == PxSolverConstraintDesc::RIGID_BODY)
+						targetVel = FAdd(targetVel, b1.projectVelocity(t1, rbXn));
+
+					f1->normalXYZ_appliedForceW = V4ClearW(Vec4V_From_Vec3V(t1));
+					f1->raXnXYZ_velMultiplierW = V4SetW(Vec4V_From_Vec3V(resp0.angular), velMultiplier);
+					f1->rbXnXYZ_biasW = V4SetW(V4Neg(Vec4V_From_Vec3V(resp1.angular)), FMul(V3Dot(t1, error), invDt));
+					f1->linDeltaVA = deltaV0.linear;
+					f1->angDeltaVA = deltaV0.angular;
+					f1->linDeltaVB = deltaV1.linear;
+					f1->angDeltaVB = deltaV1.angular;
+					FStore(targetVel, &f1->targetVel);
+				}
+			}
+		}
+
+		frictionPatchWritebackAddrIndex++;
+	}
+}
+
+}
+
+
+}
--- a/engine/third_party/physx/source/lowleveldynamics/src/DyArticulationContactPrep.h
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DyArticulationContactPrep.h
@@ -0,0 +1,100 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+   
+#ifndef DY_ARTICULATION_CONTACT_PREP_H
+#define DY_ARTICULATION_CONTACT_PREP_H
+
+#include "DySolverExt.h"
+#include "foundation/PxVecMath.h"
+
+namespace physx
+{
+struct PxcNpWorkUnit;
+class PxContactBuffer;
+struct PxContactPoint;
+
+namespace Dy
+{
+
+	struct CorrelationBuffer;
+
+	PxReal getImpulseResponse(const SolverExtBody& b0, const Cm::SpatialVector& impulse0, Cm::SpatialVector& deltaV0, PxReal dom0, PxReal angDom0,
+		const SolverExtBody& b1, const Cm::SpatialVector& impulse1, Cm::SpatialVector& deltaV1, PxReal dom1, PxReal angDom1,
+		bool allowSelfCollision = false);
+
+	aos::FloatV getImpulseResponse(const SolverExtBody& b0, const Cm::SpatialVectorV& impulse0, Cm::SpatialVectorV& deltaV0, const aos::FloatV& dom0, const aos::FloatV& angDom0,
+		const SolverExtBody& b1, const Cm::SpatialVectorV& impulse1, Cm::SpatialVectorV& deltaV1, const aos::FloatV& dom1, const aos::FloatV& angDom1,
+		bool allowSelfCollision = false);
+
+	Cm::SpatialVector createImpulseResponseVector(const PxVec3& linear, const PxVec3& angular, const SolverExtBody& body);
+	Cm::SpatialVectorV createImpulseResponseVector(const aos::Vec3V& linear, const aos::Vec3V& angular, const SolverExtBody& body);
+
+	void setupFinalizeExtSolverContacts(
+		const PxContactPoint* buffer,
+		const CorrelationBuffer& c,
+		const PxTransform& bodyFrame0,
+		const PxTransform& bodyFrame1,
+		PxU8* workspace,
+		const SolverExtBody& b0,
+		const SolverExtBody& b1,
+		const PxReal invDtF32,
+		const PxReal dtF32,
+		PxReal bounceThresholdF32,
+		PxReal invMassScale0, PxReal invInertiaScale0,
+		PxReal invMassScale1, PxReal invInertiaScale1,
+		PxReal restDistance, PxU8* frictionDataPtr,
+		PxReal ccdMaxContactDist,
+		Cm::SpatialVectorF* Z,
+		const PxReal offsetSlop);
+
+
+	bool setupFinalizeExtSolverContactsCoulomb(
+		const PxContactBuffer& buffer,
+		const CorrelationBuffer& c,
+		const PxTransform& bodyFrame0,
+		const PxTransform& bodyFrame1,
+		PxU8* workspace,
+		PxReal invDt,
+		PxReal dtF32,
+		PxReal bounceThreshold,
+		const SolverExtBody& b0,
+		const SolverExtBody& b1,
+		PxU32 frictionCountPerPoint,
+		PxReal invMassScale0, PxReal invInertiaScale0,
+		PxReal invMassScale1, PxReal invInertiaScale1,
+		PxReal restDist,
+		PxReal ccdMaxContactDist,
+		Cm::SpatialVectorF* Z,
+		const PxReal solverOffsetSlop);
+
+} //namespace Dy
+
+}
+
+#endif
+
--- a/engine/third_party/physx/source/lowleveldynamics/src/DyArticulationMimicJoint.cpp
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DyArticulationMimicJoint.cpp
@@ -0,0 +1,335 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+   
+#include "DyFeatherstoneArticulation.h"
+#include "DyArticulationMimicJointCore.h"
+#include "DyCpuGpuArticulation.h"
+
+namespace physx
+{
+namespace Dy
+{
+
+/**
+\brief Compute the deltaQDot response of a joint dof to a unit joint impulse applied to that joint dof.
+\param[in] linkIndex specifies the index of the child link of the joint under consideration.
+\param[in] dof is the joint dof that will receive the test impulse.
+\param[in] artData contains pre-computed values that will be used in the computation of the joint response.
+\return The deltaQDot response of the specified joint and dof.
+\note dof is in range (0,3) because articulation joints only support 3 degrees of freedom.
+*/
+PX_INLINE PxReal computeMimicJointSelfResponse(const PxU32 linkIndex, const PxU32 dof, const ArticulationData& artData)
+{			
+	const ArticulationLink* links = artData.getLinks();
+
+	const PxU32 parentLinkIndex = links[linkIndex].parent;
+
+	//childLinkPos - parentLinkPos
+	const PxVec3& parentLinkToChildLink = artData.getRw(linkIndex);	
+
+	const PxU32 jointOffset = artData.getJointData(linkIndex).jointOffset;
+	const PxU8 dofCount = artData.getJointData(linkIndex).nbDof;
+
+	const PxReal testJointImpulses[3][3] = {{1, 0, 0}, {0, 1, 0}, {0, 0, 1}};
+	const PxReal* testJointImpulse = testJointImpulses[dof];
+
+	//(1) Propagate joint impulse (and zero link impulse) to parent
+	PxReal QMinusStZ[3] = { 0.f, 0.f, 0.f };
+	const Cm::UnAlignedSpatialVector* motionMatrixW = artData.getWorldMotionMatrix();
+	const Cm::SpatialVectorF* IsInvStISW = artData.getISInvStIS();	
+	const Cm::SpatialVectorF Zp = propagateImpulseW(
+		parentLinkToChildLink,
+		Cm::SpatialVectorF(PxVec3(0, 0,0), PxVec3(0, 0, 0)), 
+		testJointImpulse, &IsInvStISW[jointOffset], &motionMatrixW[jointOffset], dofCount, 
+		QMinusStZ);				
+
+	//(2) Get deltaV response for parent
+	const TestImpulseResponse* linkImpulseResponses = artData.getImpulseResponseMatrixWorld();
+	const Cm::SpatialVectorF deltaVParent = -linkImpulseResponses[parentLinkIndex].getLinkDeltaVImpulseResponse(Zp);
+
+	//(3) Propagate parent deltaV and apply test impulse (encoded in QMinusStZ).
+	PxReal jointDeltaQDot[3]= {0, 0, 0};
+	const InvStIs* invStIS = artData.getInvStIS();
+	const Cm::SpatialVectorF* ISW = artData.getIsW();
+	const Cm::SpatialVectorF deltaVChild = 
+		propagateAccelerationW(
+			parentLinkToChildLink, deltaVParent, 
+			invStIS[linkIndex], &motionMatrixW[jointOffset], &ISW[jointOffset], QMinusStZ, dofCount, 
+			jointDeltaQDot);
+
+	const PxReal jointSelfResponse = jointDeltaQDot[dof];
+	return jointSelfResponse;
+}
+
+/**
+\brief Compute the deltaQDot response of a joint dof given a unit impulse applied to a different joint and dof.
+\param[in] linkA is the link whose inbound joint receives the test impulse.
+\param[in] dofA is the relevant dof of the inbound joint of linkA.
+\param[in] linkB is the link whose inbound joint receives the deltaQDot arising from the unit impulse applied to the inbound joint of linkA.
+\param[in] dofB is the relevant dof of the the inbound joint of linkB.
+\param[in] artData contains pre-computed values that will be used in the computation of the joint response.
+\param[in] QMinusSTZ is used to cache Q - S^T*Z for each dof encountered when propagating from linkA to root.
+\param[in] QMinusStZLength is the length of the QMinusSTZ array ie the number of joint dofs of the articulation.
+\return The deltaQDot response of the specified joint and dof corresponding to linkB and dofB.
+\note dofA and dofB are in range (0,3) because articulation joints only support 3 degrees of freedom.
+*/
+PX_INLINE PxReal computeMimicJointCrossResponse
+(const PxU32 linkA, const PxU32 dofA, const PxU32 linkB, const PxU32 dofB, const ArticulationData& artData, 
+ PxReal* QMinusSTZ, const PxU32 QMinusStZLength)
+{
+	const ArticulationLink* links = artData.getLinks();
+
+	//Compute the test impulse to apply the inbound joint of linkA.
+	const PxReal testJointImpulses[3][3] = {{1, 0, 0}, {0, 1, 0}, {0, 0, 1}};
+	const PxReal* testJointImpulse = testJointImpulses[dofA];
+	const Cm::SpatialVectorF testLinkImpulse(PxVec3(0, 0, 0), PxVec3(0, 0, 0));
+
+	//Zero QMinusSTZ before using it.
+	PxMemZero(QMinusSTZ, sizeof(PxReal) * QMinusStZLength);
+
+	//Get the path from linkA to root to linkB.
+	const PxU32* pathToRootElements	= artData.getPathToRootElements();
+	const PxU32 numFromRootToLink = links[linkA].mPathToRootCount;
+	const PxU32* pathFromRootToLink = &pathToRootElements[links[linkA].mPathToRootStartIndex];
+	const PxU32 numFromRootToOtherLink = links[linkB].mPathToRootCount;
+	const PxU32* pathFromRootToOtherLink = &pathToRootElements[links[linkB].mPathToRootStartIndex];
+			
+	const Cm::UnAlignedSpatialVector* motionMatrixW = artData.getWorldMotionMatrix();
+	const Cm::SpatialVectorF* ISInvStISW = artData.getISInvStIS();	
+	const InvStIs* InvStISW = artData.getInvStIS();
+	const Cm::SpatialVectorF* ISW = artData.getIsW();
+
+	// Propagate test joint impulse from inbound joint of start link to its parent link.
+	// This generates a link impulse that we can propagate to root.
+	Cm::SpatialVectorF Zp;
+	{					
+		const PxU32 linkIndex = pathFromRootToLink[numFromRootToLink - 1];
+		PX_ASSERT(linkA == linkIndex);
+
+		//childLinkPos - parentLinkPos
+		const PxVec3& parentLinkToChildLink = artData.getRw(linkIndex);	
+
+		const PxU32 jointOffset = artData.getJointData(linkIndex).jointOffset;
+		const PxU8 dofCount = artData.getJointData(linkIndex).nbDof;
+
+		Zp = propagateImpulseW(
+				parentLinkToChildLink,
+				testLinkImpulse,
+				testJointImpulse, &ISInvStISW[jointOffset], &motionMatrixW[jointOffset], dofCount, 
+				&QMinusSTZ[jointOffset]);				
+	}
+				
+	//Now propagate the link impulse to root.
+	//An optimisation would be to propagate the impulse to the common ancestor of link A and link B.
+	//Let's keep it simple for the time being.
+	for(PxU32 k = 1; k < numFromRootToLink; k++)
+	{
+		const PxU32 linkIndex = pathFromRootToLink[numFromRootToLink - 1 - k];
+
+		//childLinkPos - parentLinkPos
+		const PxVec3& parentLinkToChildLink = artData.getRw(linkIndex);
+
+		const PxU32 jointOffset = artData.getJointData(linkIndex).jointOffset;
+		const PxU8 dofCount = artData.getJointData(linkIndex).nbDof;
+
+		//(1) Propagate link impulse (and zero joint impulse) to parent
+		Zp = propagateImpulseW(
+				parentLinkToChildLink,
+				Zp,
+				NULL, &ISInvStISW[jointOffset], &motionMatrixW[jointOffset], dofCount, 
+				&QMinusSTZ[jointOffset]);				
+	}
+
+	//We can now get the deltaV on the root link.
+	const TestImpulseResponse* linkImpulseResponses = artData.getImpulseResponseMatrixWorld();
+	Cm::SpatialVectorF deltaVParent = -linkImpulseResponses[0].getLinkDeltaVImpulseResponse(Zp);
+
+	//Propagate deltaV down the tree to linkB.
+	//We only care about jointVelocity of the inbound joint of linkB.
+	PxReal jointVelocity[3] = {0, 0, 0};
+	for(PxU32 k = 0; k < numFromRootToOtherLink; k++)
+	{
+		const PxU32 linkIndex = pathFromRootToOtherLink[k];
+		PX_ASSERT((0 != k) ||( 0 == links[linkIndex].parent));
+
+		//childLinkPos - parentLinkPos
+		const PxVec3& parentToChild = artData.getRw(linkIndex);
+
+		const PxU32 jointOffset = artData.getJointData(linkIndex).jointOffset;
+		const PxU8 dofCount = artData.getJointData(linkIndex).nbDof;
+
+		//Compute the jointVelocity only when we reach linkB.
+		PxReal* jointVelocityToUse = ((numFromRootToOtherLink - 1) == k) ? jointVelocity : NULL;
+
+		deltaVParent = propagateAccelerationW(
+			parentToChild, deltaVParent, 
+			InvStISW[linkIndex], &motionMatrixW[jointOffset], &ISW[jointOffset], &QMinusSTZ[jointOffset], dofCount, 
+			jointVelocityToUse); 
+	}
+
+	//Zero QMinusSTZ after using it.
+	PxMemZero(QMinusSTZ, sizeof(PxReal) * QMinusStZLength);
+
+	//Now pick out the dof associated with joint B.
+	const PxReal r = jointVelocity[dofB];
+	return r;
+}
+
+void setupMimicJointInternal
+(const ArticulationMimicJointCore& mimicJointCore, const ArticulationData& artData, 
+ PxReal* scratchBufferQMinusStZ, const PxU32 scratchBufferQMinusStZLength, 
+ ArticulationInternalMimicJoint& mimicJointInternal)
+{
+	//The coupled joints are the inbound joints of link0 and link1.
+	const PxU32 linkA = mimicJointCore.linkA;
+	const PxU32 linkB = mimicJointCore.linkB;
+ 
+	//The dof indices of the coupled joints are computed from the axes.
+	const PxU32 dofA = artData.getLink(linkA).inboundJoint->invDofIds[mimicJointCore.axisA];
+	const PxU32 dofB = artData.getLink(linkB).inboundJoint->invDofIds[mimicJointCore.axisB];
+
+	//Compute all 4 response terms.
+	const PxReal rAA = computeMimicJointSelfResponse(linkA, dofA, artData);
+	const PxReal rBB = computeMimicJointSelfResponse(linkB, dofB, artData);
+	const PxReal rBA = computeMimicJointCrossResponse(linkA, dofA, linkB, dofB, artData, scratchBufferQMinusStZ, scratchBufferQMinusStZLength);
+	const PxReal rAB = computeMimicJointCrossResponse(linkB, dofB, linkA, dofA, artData, scratchBufferQMinusStZ, scratchBufferQMinusStZLength);
+
+	//Combine all 4 response terms to compute (J * M^-1 * J^T)
+	const PxReal gearRatio = mimicJointCore.gearRatio;
+	const PxReal recipEffectiveInertia = computeRecipMimicJointEffectiveInertia(rAA, rAB, rBB, rBA, gearRatio);
+
+	//Set everything we now know about the mimic joint.
+	mimicJointInternal.gearRatio = mimicJointCore.gearRatio;
+	mimicJointInternal.offset = mimicJointCore.offset;
+	mimicJointInternal.naturalFrequency = mimicJointCore.naturalFrequency;
+	mimicJointInternal.dampingRatio = mimicJointCore.dampingRatio;
+	mimicJointInternal.linkA = mimicJointCore.linkA;
+	mimicJointInternal.linkB = mimicJointCore.linkB;
+	mimicJointInternal.dofA = dofA;
+	mimicJointInternal.dofB = dofB;
+	mimicJointInternal.recipEffectiveInertia = recipEffectiveInertia;
+}
+
+void FeatherstoneArticulation::setupInternalMimicJointConstraints()
+{
+	//Prepare the mimic joints for the solver.
+	//We need an array {Q - S^T*Z} when computing the mimic joint response terms.
+	//We should be safe to use mDeferredQstZ here because we are pre-solver.
+	//Just make sure that we zero it again before exiting so that it is zero
+	//when we get to the solver.
+	mArticulationData.mInternalMimicJoints.reserve(mArticulationData.mNbMimicJoints);
+	mArticulationData.mInternalMimicJoints.forceSize_Unsafe(mArticulationData.mNbMimicJoints);
+	for(PxU32 i = 0; i < mArticulationData.mNbMimicJoints; i++)
+	{
+		const ArticulationMimicJointCore& mimicJointCore = *mArticulationData.mMimicJoints[i];
+		ArticulationInternalMimicJoint& mimicJointInternal = mArticulationData.mInternalMimicJoints[i];
+		setupMimicJointInternal(
+			mimicJointCore, 
+			mArticulationData,
+			mArticulationData.mDeferredQstZ.begin(), mArticulationData.mDeferredQstZ.size(),
+			mimicJointInternal);
+	}//nbMimicJoints							
+}
+
+void FeatherstoneArticulation::solveInternalMimicJointConstraints(const PxReal dt, const PxReal invDt, const bool velocityIteration, const bool isTGS, const PxReal biasCoefficient)
+{
+	PX_UNUSED(dt);
+	PX_UNUSED(isTGS);
+		
+	for(PxU32 i = 0; i < mArticulationData.mNbMimicJoints; i++)
+	{
+		const ArticulationInternalMimicJoint& internalMimicJoint = mArticulationData.mInternalMimicJoints[i];
+
+		//Get the gearing ratio and offset.
+		const PxReal gearRatio = internalMimicJoint.gearRatio;
+		const PxReal offset = internalMimicJoint.offset;
+
+		//Get the compliance of the mimic joint
+		const PxReal naturalFrequency = internalMimicJoint.naturalFrequency;
+		const PxReal dampingRatio = internalMimicJoint.dampingRatio;
+
+		//Get the responses of the mimic joint.
+		const PxReal mimicJointRecipEffectiveInertia = internalMimicJoint.recipEffectiveInertia;
+
+		//Get the dofs involved in the mimic joint.
+		//We need these to work out the joint dof speeds and positions.
+		const PxU32 linkA = internalMimicJoint.linkA;
+		const PxU32 dofA = internalMimicJoint.dofA;
+		const PxU32 linkB = internalMimicJoint.linkB;
+		const PxU32 dofB = internalMimicJoint.dofB;
+
+		//Get the positions of the joint dofs coupled by the mimic joint.
+		const PxU32 jointOffsetA = mArticulationData.mLinks[linkA].inboundJoint->jointOffset;
+		const PxU32 jointOffsetB = mArticulationData.mLinks[linkB].inboundJoint->jointOffset;
+		const PxReal qA = mArticulationData.mJointPosition[jointOffsetA + dofA];
+		const PxReal qB = mArticulationData.mJointPosition[jointOffsetB + dofB];
+
+		//Get the speeds of the joint dofs coupled by the mimic joint.
+		PxReal qADot = 0;
+		PxReal qBDot = 0;
+		{
+			PxReal jointDofSpeedsA[3] = {0, 0, 0};
+			pxcFsGetVelocity(linkA, jointDofSpeedsA);
+			PxReal jointDofSpeedsB[3] = {0, 0, 0};
+			pxcFsGetVelocity(linkB, jointDofSpeedsB);
+			qADot = jointDofSpeedsA[dofA];
+			qBDot = jointDofSpeedsB[dofB];
+		}
+
+		//We can now compute the joint impulses to apply to the inbound joints of links A and B.
+		PxReal jointImpulseA[3] = {0, 0, 0};
+		PxReal jointImpulseB[3] = {0, 0, 0};
+		{			
+			PxReal jointDofImpA = 0;
+			PxReal jointdofImpB = 0;
+			computeMimicJointImpulses(
+				biasCoefficient, dt, invDt, 
+				qA, qB, qADot, qBDot, 
+				gearRatio, offset, 
+				naturalFrequency, dampingRatio, 
+				mimicJointRecipEffectiveInertia,
+				velocityIteration,
+				jointDofImpA, jointdofImpB);
+			jointImpulseA[dofA] = jointDofImpA;
+			jointImpulseB[dofB] = jointdofImpB;
+		}
+
+		//Apply the joint impulses from link to root.
+		//This will accumulate deferredQMinusSTZ for each encountered joint dof 
+		//and deferredZ for the root link.
+		const aos::Vec3V zeroImpulse = aos::V3Zero();
+		pxcFsApplyImpulses(
+			linkA, zeroImpulse, zeroImpulse, jointImpulseA,
+			linkB, zeroImpulse, zeroImpulse, jointImpulseB);
+	}
+
+}
+
+
+} //namespace Dy
+} //namespace physx
--- a/engine/third_party/physx/source/lowleveldynamics/src/DyArticulationPImpl.h
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DyArticulationPImpl.h
@@ -0,0 +1,107 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef DY_ARTICULATION_INTERFACE_H
+#define DY_ARTICULATION_INTERFACE_H
+
+#include "DyFeatherstoneArticulation.h"
+
+namespace physx
+{
+namespace Dy
+{
+	struct ArticulationSolverDesc;
+
+class ArticulationPImpl
+{
+public:
+
+	static PxU32 computeUnconstrainedVelocities(const ArticulationSolverDesc& desc,
+											PxReal dt,
+											PxU32& acCount,
+											const PxVec3& gravity, 
+											const PxReal invLengthScale)
+	{
+		return FeatherstoneArticulation::computeUnconstrainedVelocities(desc, dt, acCount, gravity, invLengthScale);
+	}
+
+	static void	updateBodies(const ArticulationSolverDesc& desc, Cm::SpatialVectorF* tempDeltaV,
+						 PxReal dt)
+	{
+		FeatherstoneArticulation::updateBodies(desc, tempDeltaV, dt);
+	}
+
+	static void	updateBodiesTGS(const ArticulationSolverDesc& desc, Cm::SpatialVectorF* tempDeltaV,
+		PxReal dt)
+	{
+		FeatherstoneArticulation::updateBodiesTGS(desc, tempDeltaV, dt);
+	}
+
+	static void	saveVelocity(FeatherstoneArticulation* articulation, Cm::SpatialVectorF* tempDeltaV)
+	{
+		FeatherstoneArticulation::saveVelocity(articulation, tempDeltaV);
+	}
+
+	static void	saveVelocityTGS(FeatherstoneArticulation* articulation, PxReal invDtF32)
+	{
+		FeatherstoneArticulation::saveVelocityTGS(articulation, invDtF32);
+	}
+
+	static void computeUnconstrainedVelocitiesTGS(const ArticulationSolverDesc& desc,
+		PxReal dt,
+		const PxVec3& gravity,
+		PxReal invLengthScale,
+		bool externalForcesEveryTgsIterationEnabled)
+	{
+		FeatherstoneArticulation::computeUnconstrainedVelocitiesTGS(desc, dt, gravity, invLengthScale, externalForcesEveryTgsIterationEnabled);
+	}
+
+	static void	updateDeltaMotion(const ArticulationSolverDesc& desc, const PxReal dt, Cm::SpatialVectorF* DeltaV, const PxReal totalInvDt)
+	{
+		FeatherstoneArticulation::recordDeltaMotion(desc, dt, DeltaV, totalInvDt);
+	}
+
+	static void	deltaMotionToMotionVel(const ArticulationSolverDesc& desc, const PxReal invDt)
+	{
+		FeatherstoneArticulation::deltaMotionToMotionVelocity(desc, invDt);
+	}
+
+	static PxU32 setupSolverInternalConstraintsTGS(const ArticulationSolverDesc& desc,
+		PxReal dt,
+		PxReal invDt,
+		PxReal totalDt)
+	{
+		return FeatherstoneArticulation::setupSolverConstraintsTGS(desc, dt, invDt, totalDt);
+	}
+};
+
+
+}
+}
+#endif
+
--- a/engine/third_party/physx/source/lowleveldynamics/src/DyArticulationUtils.h
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DyArticulationUtils.h
@@ -0,0 +1,48 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef DY_ARTICULATION_UTILS_H
+#define DY_ARTICULATION_UTILS_H
+
+#include "CmSpatialVector.h"
+
+namespace physx
+{
+
+namespace Dy
+{
+
+using namespace aos;
+
+PX_FORCE_INLINE Cm::SpatialVector& unsimdRef(Cm::SpatialVectorV& v)				{ return reinterpret_cast<Cm::SpatialVector&>(v); }
+PX_FORCE_INLINE const Cm::SpatialVector& unsimdRef(const Cm::SpatialVectorV& v) { return reinterpret_cast<const Cm::SpatialVector&>(v); }
+
+}
+}
+
+#endif
--- a/engine/third_party/physx/source/lowleveldynamics/src/DyBodyCoreIntegrator.h
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DyBodyCoreIntegrator.h
@@ -0,0 +1,181 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef DY_BODYCORE_INTEGRATOR_H
+#define DY_BODYCORE_INTEGRATOR_H
+
+#include "PxvDynamics.h"
+#include "DySolverBody.h"
+
+namespace physx
+{
+namespace Dy
+{
+PX_FORCE_INLINE void bodyCoreComputeUnconstrainedVelocity
+	(const PxVec3& gravity, PxReal dt, PxReal linearDamping, PxReal angularDamping, PxReal accelScale, 
+	PxReal maxLinearVelocitySq, PxReal maxAngularVelocitySq, PxVec3& inOutLinearVelocity, PxVec3& inOutAngularVelocity,
+	bool disableGravity)
+{
+	//Multiply everything that needs multiplied by dt to improve code generation.
+
+	PxVec3 linearVelocity = inOutLinearVelocity;
+	PxVec3 angularVelocity = inOutAngularVelocity;
+	
+	const PxReal linearDampingTimesDT=linearDamping*dt;
+	const PxReal angularDampingTimesDT=angularDamping*dt;
+	const PxReal oneMinusLinearDampingTimesDT=1.0f-linearDampingTimesDT;
+	const PxReal oneMinusAngularDampingTimesDT=1.0f-angularDampingTimesDT;
+
+	//TODO context-global gravity
+	if(!disableGravity)
+	{
+		const PxVec3 linearAccelTimesDT = gravity*dt *accelScale;
+		linearVelocity += linearAccelTimesDT;
+	}
+
+	//Apply damping.
+	const PxReal linVelMultiplier = physx::intrinsics::fsel(oneMinusLinearDampingTimesDT, oneMinusLinearDampingTimesDT, 0.0f);
+	const PxReal angVelMultiplier = physx::intrinsics::fsel(oneMinusAngularDampingTimesDT, oneMinusAngularDampingTimesDT, 0.0f);
+	linearVelocity*=linVelMultiplier;
+	angularVelocity*=angVelMultiplier;
+
+	// Clamp velocity
+	const PxReal linVelSq = linearVelocity.magnitudeSquared();
+	if(linVelSq > maxLinearVelocitySq)
+	{
+		linearVelocity *= PxSqrt(maxLinearVelocitySq / linVelSq);
+	}
+	const PxReal angVelSq = angularVelocity.magnitudeSquared();
+	if(angVelSq > maxAngularVelocitySq)
+	{
+		angularVelocity *= PxSqrt(maxAngularVelocitySq / angVelSq);
+	}
+
+	inOutLinearVelocity = linearVelocity;
+	inOutAngularVelocity = angularVelocity;
+}
+
+PX_FORCE_INLINE void integrateCore(PxVec3& motionLinearVelocity, PxVec3& motionAngularVelocity, 
+	PxSolverBody& solverBody, PxSolverBodyData& solverBodyData, PxF32 dt, PxU32 lockFlags)
+{
+	if (lockFlags)
+	{
+		if (lockFlags & PxRigidDynamicLockFlag::eLOCK_LINEAR_X)
+		{
+			motionLinearVelocity.x = 0.f;
+			solverBody.linearVelocity.x = 0.f;
+			solverBodyData.linearVelocity.x = 0.f;
+		}
+		if (lockFlags & PxRigidDynamicLockFlag::eLOCK_LINEAR_Y)
+		{
+			motionLinearVelocity.y = 0.f;
+			solverBody.linearVelocity.y = 0.f;
+			solverBodyData.linearVelocity.y = 0.f;
+		}
+		if (lockFlags & PxRigidDynamicLockFlag::eLOCK_LINEAR_Z)
+		{
+			motionLinearVelocity.z = 0.f;
+			solverBody.linearVelocity.z = 0.f;
+			solverBodyData.linearVelocity.z = 0.f;
+		}
+		
+		//The angular velocity should be 0 because it is now impossible to make it rotate around that axis!
+		if (lockFlags & PxRigidDynamicLockFlag::eLOCK_ANGULAR_X)
+		{
+			motionAngularVelocity.x = 0.f;
+			solverBody.angularState.x = 0.f;
+		}
+		if (lockFlags & PxRigidDynamicLockFlag::eLOCK_ANGULAR_Y)
+		{
+			motionAngularVelocity.y = 0.f;
+			solverBody.angularState.y = 0.f;
+		}
+		if (lockFlags & PxRigidDynamicLockFlag::eLOCK_ANGULAR_Z)
+		{
+			motionAngularVelocity.z = 0.f;
+			solverBody.angularState.z = 0.f;
+		}
+	}
+
+	{
+		// Integrate linear part
+		const PxVec3 linearMotionVel = solverBodyData.linearVelocity + motionLinearVelocity;
+		motionLinearVelocity = linearMotionVel;
+		const PxVec3 delta = linearMotionVel * dt;
+
+		solverBodyData.body2World.p += delta;
+		PX_ASSERT(solverBodyData.body2World.p.isFinite());
+	}
+
+	{
+		PxVec3 angularMotionVel = solverBodyData.angularVelocity + solverBodyData.sqrtInvInertia * motionAngularVelocity;
+		PxReal w = angularMotionVel.magnitudeSquared();
+
+		// Integrate the rotation using closed form quaternion integrator
+		if (w != 0.0f)
+		{
+			w = PxSqrt(w);
+			// Perform a post-solver clamping
+			// TODO(dsequeira): ignore this for the moment
+			//just clamp motionVel to half float-range
+			const PxReal maxW = 1e+7f;		//Should be about sqrt(PX_MAX_REAL/2) or smaller
+			if (w > maxW)
+			{
+				angularMotionVel = angularMotionVel.getNormalized() * maxW;
+				w = maxW;
+			}
+			const PxReal v = dt * w * 0.5f;
+			PxReal s, q;
+			PxSinCos(v, s, q);
+			s /= w;
+
+			const PxVec3 pqr = angularMotionVel * s;
+			const PxQuat quatVel(pqr.x, pqr.y, pqr.z, 0);
+			PxQuat result = quatVel * solverBodyData.body2World.q;
+
+			result += solverBodyData.body2World.q * q;
+
+			solverBodyData.body2World.q = result.getNormalized();
+			PX_ASSERT(solverBodyData.body2World.q.isSane());
+			PX_ASSERT(solverBodyData.body2World.q.isFinite());
+		}
+
+		motionAngularVelocity = angularMotionVel;
+	}
+
+	{
+		//Store back the linear and angular velocities
+		//core.linearVelocity += solverBody.linearVelocity * solverBodyData.sqrtInvMass;
+		solverBodyData.linearVelocity += solverBody.linearVelocity;
+		solverBodyData.angularVelocity += solverBodyData.sqrtInvInertia * solverBody.angularState;
+	}
+}
+}
+}
+
+#endif
--- a/engine/third_party/physx/source/lowleveldynamics/src/DyConstraintPartition.cpp
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DyConstraintPartition.cpp
--- a/engine/third_party/physx/source/lowleveldynamics/src/DyConstraintPartition.h
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DyConstraintPartition.h
@@ -0,0 +1,95 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef DY_CONSTRAINT_PARTITION_H
+#define DY_CONSTRAINT_PARTITION_H
+
+#include "DyDynamics.h"
+#include "DyFeatherstoneArticulation.h"
+
+namespace physx
+{
+namespace Dy
+{
+// PT: input of partitionContactConstraints
+struct ConstraintPartitionIn
+{
+	ConstraintPartitionIn(	PxU8* bodies, PxU32 nbBodies, PxU32 stride,
+							Dy::FeatherstoneArticulation** articulations, PxU32 nbArticulations,
+							const PxSolverConstraintDesc* contactConstraintDescs, PxU32 nbContactConstraintDescs,
+							PxU32 maxPartitions, bool forceStaticConstraintsToSolver) :
+		mBodies	(bodies), mNumBodies(nbBodies), mStride(stride),
+		mArticulationPtrs(articulations), mNumArticulationPtrs(nbArticulations),
+		mContactConstraintDescriptors(contactConstraintDescs), mNumContactConstraintDescriptors(nbContactConstraintDescs),
+		mMaxPartitions(maxPartitions), mForceStaticConstraintsToSolver(forceStaticConstraintsToSolver)
+	{
+	}
+
+	PxU8*							mBodies;							// PT: PxSolverBody (PGS) or PxTGSSolverBodyVel (TGS)
+	PxU32							mNumBodies;
+	PxU32							mStride;
+	Dy::FeatherstoneArticulation**	mArticulationPtrs;
+	PxU32							mNumArticulationPtrs;
+	const PxSolverConstraintDesc*	mContactConstraintDescriptors;
+	PxU32							mNumContactConstraintDescriptors;
+	PxU32							mMaxPartitions;						// PT: limit the number of "resizes" beyond the initial 32
+	bool							mForceStaticConstraintsToSolver;	// PT: only for PGS + point-friction
+};
+
+// PT: output of partitionContactConstraints
+struct ConstraintPartitionOut
+{
+	ConstraintPartitionOut(PxSolverConstraintDesc* orderedContactConstraintDescriptors, PxSolverConstraintDesc* overflowConstraintDescriptors, PxArray<PxU32>* constraintsPerPartition) :
+		mOrderedContactConstraintDescriptors(orderedContactConstraintDescriptors),
+		mOverflowConstraintDescriptors(overflowConstraintDescriptors),
+		mConstraintsPerPartition(constraintsPerPartition),
+		mNumDifferentBodyConstraints(0),
+		mNumStaticConstraints(0),
+		mNumOverflowConstraints(0)
+	{
+	}
+
+	PxSolverConstraintDesc*	mOrderedContactConstraintDescriptors;
+	PxSolverConstraintDesc*	mOverflowConstraintDescriptors;
+	PxArray<PxU32>*			mConstraintsPerPartition;	// PT: actually accumulated constraints per partition
+	PxU32					mNumDifferentBodyConstraints;
+	PxU32					mNumStaticConstraints;
+	PxU32					mNumOverflowConstraints;
+};
+
+PxU32 partitionContactConstraints(ConstraintPartitionOut& out, const ConstraintPartitionIn& in);
+
+// PT: TODO: why is this only called for TGS?
+void processOverflowConstraints(PxU8* bodies, PxU32 bodyStride, PxU32 numBodies, ArticulationSolverDesc* articulations, PxU32 numArticulations,
+	PxSolverConstraintDesc* constraints, PxU32 numConstraints);
+
+} // namespace physx
+
+}
+
+#endif
--- a/engine/third_party/physx/source/lowleveldynamics/src/DyConstraintPrep.h
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DyConstraintPrep.h
@@ -0,0 +1,138 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef DY_CONSTRAINT_PREP_H
+#define DY_CONSTRAINT_PREP_H
+
+#include "DyConstraint.h"
+
+#include "DySolverConstraintDesc.h"
+#include "foundation/PxArray.h"
+#include "PxConstraint.h"
+
+namespace physx
+{
+
+class PxcConstraintBlockStream;
+class PxsConstraintBlockManager;
+struct PxSolverBody;
+struct PxSolverBodyData;
+struct PxSolverConstraintDesc;
+
+namespace Cm
+{
+	struct SpatialVectorF;
+}
+
+namespace Dy
+{
+
+	static const PxU32 MAX_CONSTRAINT_ROWS = 20;
+
+	struct SolverConstraintShaderPrepDesc
+	{
+		const Constraint* constraint;
+		PxConstraintSolverPrep solverPrep;
+		const void* constantBlock;
+		PxU32 constantBlockByteSize;
+	};
+
+	SolverConstraintPrepState::Enum setupSolverConstraint4
+		(SolverConstraintShaderPrepDesc* PX_RESTRICT constraintShaderDescs,
+		PxSolverConstraintPrepDesc* PX_RESTRICT constraintDescs,
+			const PxReal dt, const PxReal recipdt, PxU32& totalRows,
+			 PxConstraintAllocator& allocator, bool residualReportingEnabled);
+
+	SolverConstraintPrepState::Enum setupSolverConstraint4
+		(PxSolverConstraintPrepDesc* PX_RESTRICT constraintDescs,
+		const PxReal dt, const PxReal recipdt, PxU32& totalRows,
+		PxConstraintAllocator& allocator, PxU32 maxRows, bool residualReportingEnabled);
+
+	PxU32 SetupSolverConstraint(SolverConstraintShaderPrepDesc& shaderDesc,
+								PxSolverConstraintPrepDesc& prepDesc,
+								   PxConstraintAllocator& allocator,
+								   PxReal dt, PxReal invdt);
+
+
+	class ConstraintHelper
+	{
+	public:
+
+		static PxU32 setupSolverConstraint(
+			PxSolverConstraintPrepDesc& prepDesc,
+			PxConstraintAllocator& allocator,
+			PxReal dt, PxReal invdt);
+	};
+
+	template<class PrepDescT>
+	PX_FORCE_INLINE void setupConstraintFlags(PrepDescT& prepDesc, PxU16 flags)
+	{
+		prepDesc.disablePreprocessing	= (flags & PxConstraintFlag::eDISABLE_PREPROCESSING)!=0;
+		prepDesc.improvedSlerp			= (flags & PxConstraintFlag::eIMPROVED_SLERP)!=0;
+		prepDesc.driveLimitsAreForces	= (flags & PxConstraintFlag::eDRIVE_LIMITS_ARE_FORCES)!=0;
+		prepDesc.extendedLimits			= (flags & PxConstraintFlag::eENABLE_EXTENDED_LIMITS)!=0;
+		prepDesc.disableConstraint		= (flags & PxConstraintFlag::eDISABLE_CONSTRAINT)!=0;
+	}
+
+	void preprocessRows(Px1DConstraint** sorted,
+		Px1DConstraint* rows,
+		PxVec4* angSqrtInvInertia0,
+		PxVec4* angSqrtInvInertia1,
+		PxU32 rowCount,
+		const PxMat33& sqrtInvInertia0F32,
+		const PxMat33& sqrtInvInertia1F32,
+		const PxReal invMass0,
+		const PxReal invMass1,
+		const PxConstraintInvMassScale& ims,
+		bool disablePreprocessing,
+		bool diagonalizeDrive);
+
+	PX_FORCE_INLINE	void setupConstraintRows(Px1DConstraint* PX_RESTRICT rows, PxU32 size)
+	{
+		// This is necessary so that there will be sensible defaults and shaders will
+		// continue to work (albeit with a recompile) if the row format changes.
+		// It's a bit inefficient because it fills in all constraint rows even if there
+		// is only going to be one generated. A way around this would be for the shader to
+		// specify the maximum number of rows it needs, or it could call a subroutine to
+		// prep the row before it starts filling it it.
+
+		PxMemZero(rows, sizeof(Px1DConstraint)*size);
+
+		for(PxU32 i=0; i<size; i++)
+		{
+			Px1DConstraint& c = rows[i];
+			//Px1DConstraintInit(c);
+			c.minImpulse = -PX_MAX_REAL;
+			c.maxImpulse = PX_MAX_REAL;
+		}
+	}
+}
+
+}
+
+#endif
--- a/engine/third_party/physx/source/lowleveldynamics/src/DyConstraintSetup.cpp
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DyConstraintSetup.cpp
@@ -0,0 +1,809 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#include "foundation/PxMemory.h"
+#include "foundation/PxMathUtils.h"
+#include "DyConstraintPrep.h"
+#include "DyCpuGpuArticulation.h"
+#include "PxsRigidBody.h"
+#include "DySolverConstraint1D.h"
+#include "foundation/PxSort.h"
+#include "DySolverConstraintDesc.h"
+#include "PxcConstraintBlockStream.h"
+#include "DyArticulationContactPrep.h"
+#include "foundation/PxSIMDHelpers.h"
+#include "DyArticulationUtils.h"
+#include "DyAllocator.h"
+
+namespace physx
+{
+namespace Dy
+{
+	// dsequeira:
+	//
+	// we can choose any linear combination of equality constraints and get the same solution
+	// Hence we can orthogonalize the constraints using the inner product given by the
+	// inverse mass matrix, so that when we use PGS, solving a constraint row for a joint 
+	// don't disturb the solution of prior rows.
+	//
+	// We also eliminate the equality constraints from the hard inequality constraints - 
+	// (essentially projecting the direction corresponding to the lagrange multiplier 
+	// onto the equality constraint subspace) but 'til I've verified this generates 
+	// exactly the same KKT/complementarity conditions, status is 'experimental'. 	
+	//
+	// since for equality constraints the resulting rows have the property that applying
+	// an impulse along one row doesn't alter the projected velocity along another row, 
+	// all equality constraints (plus one inequality constraint) can be processed in parallel
+	// using SIMD
+	//
+	// Eliminating the inequality constraints from each other would require a solver change
+	// and not give us any more parallelism, although we might get better convergence.
+
+namespace
+{
+struct MassProps
+{
+	FloatV invMass0;  // the inverse mass of body0 after inverse mass scale was applied
+	FloatV invMass1;  // the inverse mass of body1 after inverse mass scale was applied
+	FloatV invInertiaScale0;
+	FloatV invInertiaScale1;
+
+	PX_FORCE_INLINE MassProps(const PxReal imass0, const PxReal imass1, const PxConstraintInvMassScale& ims) :
+		invMass0(FLoad(imass0 * ims.linear0)),
+		invMass1(FLoad(imass1 * ims.linear1)),
+		invInertiaScale0(FLoad(ims.angular0)),
+		invInertiaScale1(FLoad(ims.angular1))
+	{}
+};
+
+PX_FORCE_INLINE PxReal innerProduct(const Px1DConstraint& row0, const Px1DConstraint& row1, 
+								 const PxVec4& row0AngSqrtInvInertia0, const PxVec4& row0AngSqrtInvInertia1, 
+								 const PxVec4& row1AngSqrtInvInertia0, const PxVec4& row1AngSqrtInvInertia1, const MassProps& m)
+{
+	const Vec3V l0 = V3Mul(V3Scale(V3LoadA(row0.linear0), m.invMass0), V3LoadA(row1.linear0));
+	const Vec3V l1 = V3Mul(V3Scale(V3LoadA(row0.linear1), m.invMass1), V3LoadA(row1.linear1));
+	const Vec4V r0ang0 = V4LoadA(&row0AngSqrtInvInertia0.x);
+	const Vec4V r1ang0 = V4LoadA(&row1AngSqrtInvInertia0.x);
+	const Vec4V r0ang1 = V4LoadA(&row0AngSqrtInvInertia1.x);
+	const Vec4V r1ang1 = V4LoadA(&row1AngSqrtInvInertia1.x);
+
+	const Vec3V i0 = V3ScaleAdd(V3Mul(Vec3V_From_Vec4V(r0ang0), Vec3V_From_Vec4V(r1ang0)), m.invInertiaScale0, l0);
+	const Vec3V i1 = V3ScaleAdd(V3MulAdd(Vec3V_From_Vec4V(r0ang1), Vec3V_From_Vec4V(r1ang1), i0), m.invInertiaScale1, l1);
+	PxF32 f;
+	FStore(V3SumElems(i1), &f);
+	return f;
+}
+
+// indexed rotation around axis, with sine and cosine of half-angle
+PX_FORCE_INLINE PxQuat indexedRotation(PxU32 axis, PxReal s, PxReal c)
+{
+	PxQuat q(0,0,0,c);
+	reinterpret_cast<PxReal*>(&q)[axis] = s;
+	return q;
+}
+
+// PT: TODO: refactor with duplicate in FdMathUtils.cpp
+PxQuat diagonalize(const PxMat33& m)	// jacobi rotation using quaternions 
+{
+	const PxU32 MAX_ITERS = 5;
+
+	PxQuat q(PxIdentity);
+
+	PxMat33 d;
+	for(PxU32 i=0; i < MAX_ITERS;i++)
+	{
+		const PxMat33Padded axes(q);
+		d = axes.getTranspose() * m * axes;
+
+		const PxReal d0 = PxAbs(d[1][2]), d1 = PxAbs(d[0][2]), d2 = PxAbs(d[0][1]);
+		const PxU32 a = PxU32(d0 > d1 && d0 > d2 ? 0 : d1 > d2 ? 1 : 2);	// rotation axis index, from largest off-diagonal element
+
+		const PxU32 a1 = PxGetNextIndex3(a), a2 = PxGetNextIndex3(a1);											
+		if(d[a1][a2] == 0.0f || PxAbs(d[a1][a1] - d[a2][a2]) > 2e6f * PxAbs(2.0f * d[a1][a2]))
+			break;
+
+		const PxReal w = (d[a1][a1] - d[a2][a2]) / (2.0f * d[a1][a2]);	// cot(2 * phi), where phi is the rotation angle
+		const PxReal absw = PxAbs(w);
+
+		PxQuat r;
+		if(absw > 1000)
+			r = indexedRotation(a, 1.0f / (4.0f * w), 1.0f);	// h will be very close to 1, so use small angle approx instead
+		else
+		{
+  			const PxReal t = 1.0f / (absw + PxSqrt(w * w + 1.0f));	// absolute value of tan phi
+			const PxReal h = 1.0f / PxSqrt(t * t + 1.0f);			// absolute value of cos phi
+
+			PX_ASSERT(h != 1);	// |w|<1000 guarantees this with typical IEEE754 machine eps (approx 6e-8)
+			r = indexedRotation(a, PxSqrt((1.0f - h) / 2.0f) * PxSign(w), PxSqrt((1.0f + h) / 2.0f));
+		}
+	
+		q = (q * r).getNormalized();
+	}
+
+	return q;
+}
+
+PX_FORCE_INLINE void rescale(const Mat33V& m, PxVec3& a0, PxVec3& a1, PxVec3& a2)
+{
+	const Vec3V va0 = V3LoadU(a0);
+	const Vec3V va1 = V3LoadU(a1);
+	const Vec3V va2 = V3LoadU(a2);
+
+	const Vec3V b0 = V3ScaleAdd(va0, V3GetX(m.col0), V3ScaleAdd(va1, V3GetY(m.col0), V3Scale(va2, V3GetZ(m.col0))));
+	const Vec3V b1 = V3ScaleAdd(va0, V3GetX(m.col1), V3ScaleAdd(va1, V3GetY(m.col1), V3Scale(va2, V3GetZ(m.col1))));
+	const Vec3V b2 = V3ScaleAdd(va0, V3GetX(m.col2), V3ScaleAdd(va1, V3GetY(m.col2), V3Scale(va2, V3GetZ(m.col2))));
+
+	V3StoreU(b0, a0);
+	V3StoreU(b1, a1);
+	V3StoreU(b2, a2);
+}
+
+PX_FORCE_INLINE void rescale4(const Mat33V& m, PxReal* a0, PxReal* a1, PxReal* a2)
+{
+	const Vec4V va0 = V4LoadA(a0);
+	const Vec4V va1 = V4LoadA(a1);
+	const Vec4V va2 = V4LoadA(a2);
+
+	const Vec4V b0 = V4ScaleAdd(va0, V3GetX(m.col0), V4ScaleAdd(va1, V3GetY(m.col0), V4Scale(va2, V3GetZ(m.col0))));
+	const Vec4V b1 = V4ScaleAdd(va0, V3GetX(m.col1), V4ScaleAdd(va1, V3GetY(m.col1), V4Scale(va2, V3GetZ(m.col1))));
+	const Vec4V b2 = V4ScaleAdd(va0, V3GetX(m.col2), V4ScaleAdd(va1, V3GetY(m.col2), V4Scale(va2, V3GetZ(m.col2))));
+
+	V4StoreA(b0, a0);
+	V4StoreA(b1, a1);
+	V4StoreA(b2, a2);
+}
+
+void diagonalize(Px1DConstraint** row,
+				 PxVec4* angSqrtInvInertia0,
+				 PxVec4* angSqrtInvInertia1,
+				 const MassProps& m)
+{
+	const PxReal a00 = innerProduct(*row[0], *row[0], angSqrtInvInertia0[0], angSqrtInvInertia1[0], angSqrtInvInertia0[0], angSqrtInvInertia1[0], m);
+	const PxReal a01 = innerProduct(*row[0], *row[1], angSqrtInvInertia0[0], angSqrtInvInertia1[0], angSqrtInvInertia0[1], angSqrtInvInertia1[1], m);
+	const PxReal a02 = innerProduct(*row[0], *row[2], angSqrtInvInertia0[0], angSqrtInvInertia1[0], angSqrtInvInertia0[2], angSqrtInvInertia1[2], m);
+	const PxReal a11 = innerProduct(*row[1], *row[1], angSqrtInvInertia0[1], angSqrtInvInertia1[1], angSqrtInvInertia0[1], angSqrtInvInertia1[1], m);
+	const PxReal a12 = innerProduct(*row[1], *row[2], angSqrtInvInertia0[1], angSqrtInvInertia1[1], angSqrtInvInertia0[2], angSqrtInvInertia1[2], m);
+	const PxReal a22 = innerProduct(*row[2], *row[2], angSqrtInvInertia0[2], angSqrtInvInertia1[2], angSqrtInvInertia0[2], angSqrtInvInertia1[2], m);
+
+	const PxMat33 a(PxVec3(a00, a01, a02),
+					PxVec3(a01, a11, a12),
+					PxVec3(a02, a12, a22));
+
+	const PxQuat q = diagonalize(a);
+
+	const PxMat33 n(-q);
+
+	const Mat33V mn(V3LoadU(n.column0), V3LoadU(n.column1), V3LoadU(n.column2));
+
+	//KS - We treat as a Vec4V so that we get geometricError rescaled for free along with linear0
+	rescale4(mn, &row[0]->linear0.x, &row[1]->linear0.x, &row[2]->linear0.x);
+	rescale(mn, row[0]->linear1, row[1]->linear1, row[2]->linear1);
+	//KS - We treat as a PxVec4 so that we get velocityTarget rescaled for free 
+	rescale4(mn, &row[0]->angular0.x, &row[1]->angular0.x, &row[2]->angular0.x);
+	rescale(mn, row[0]->angular1, row[1]->angular1, row[2]->angular1);
+	rescale4(mn, &angSqrtInvInertia0[0].x, &angSqrtInvInertia0[1].x, &angSqrtInvInertia0[2].x);
+	rescale4(mn, &angSqrtInvInertia1[0].x, &angSqrtInvInertia1[1].x, &angSqrtInvInertia1[2].x);
+}
+
+//
+// A 1D constraint between two bodies (b0, b1) acts on specific linear and angular velocity
+// directions of these two bodies. Let the constrained linear velocity direction for body b0
+// be l0 and the constrained angular velocity direction be a0. Likewise, let l1 and a1 be
+// the corresponding constrained velocity directions for body b1.
+// 
+// Let the constraint Jacobian J be the 1x12 vector that combines the 3x1 vectors l0, a0, l1, a1
+// J = | l0^T, a0^T, l1^T, a1^T |
+// 
+// Let vl0, va0, vl1, va1 be the 3x1 linear/angular velocites of two bodies
+// and v be the 12x1 combination of those:
+// 
+//     | vl0 |
+// v = | va0 |
+//     | vl1 |
+//     | va1 |
+//
+// The constraint projected velocity scalar is then:
+// projV = J * v
+// 
+// Let M be the 12x12 mass matrix (with scalar masses m0, m1 and 3x3 inertias I0, I1)
+// 
+// | m0                            |
+// |    m0                         |
+// |       m0                      |
+// |         |    |                |
+// |         | I0 |                |
+// |         |    |                |
+// |                m1             |
+// |                   m1          |
+// |                      m1       |
+// |                        |    | |
+// |                        | I1 | |
+// |                        |    | |
+// 
+// Let p be the impulse scalar that results from solving the 1D constraint given
+// projV, geometric error etc.
+// Turning this impulse p to a 12x1 delta velocity vector dv:
+// 
+// dv = M^-1 * (J^T * p) =  M^-1 * J^T * p
+// 
+// Now to consider the case of multiple 1D constraints J0, J1, J2, ... operating
+// on the same body pair.
+// 
+// Let K be the matrix holding these constraints as rows
+// 
+//     |  J0  |
+// K = |  J1  |
+//     |  J2  |
+//     |  ... |
+// 
+// Applying these constraints:
+//
+// |                 |                | p0       |
+// | dv0 dv1 dv2 ... | = M^-1 * K^T * |    p1    |
+// |                 |                |       p2 |
+//
+// Let MK = (M^-1 * K^T)^T = K * M^-1  (M^-1 is symmetric). The transpose
+// is only used here to talk about constraint rows instead of columns (since
+// that expression seems to be used more commonly here).
+// 
+// dvMatrix = MK^T * pMatrix
+// 
+// The rows of MK define how an impulse affects the constrained velocity directions
+// of the two bodies. Ideally, the different constraint rows operate on independent
+// parts of the velocity such that constraints don't step on each others toe
+// (constraint A making the situation better with respect to its own constrained
+// velocity directions but worse for directions of constraint B and vice versa).
+// A formal way to specify this goal is to try to have the rows of MK be orthogonal
+// to each other, that is, to orthogonalize the MK matrix. This will eliminate
+// any action of a constraint in directions that have been touched by previous
+// constraints already. This re-configuration of constraint rows does not work in
+// general but for hard equality constraints (no spring, targetVelocity=0,
+// min/maxImpulse unlimited), changing the constraint rows to make them orthogonal
+// should not change the solution of the constraint problem. As an example, one
+// might consider a joint with two 1D constraints that lock linear movement in the
+// xy-plane (those are hard equality constraints). It's fine to choose different
+// constraint directions from the ones provided, assuming the new directions are
+// still in the xy-plane and that the geometric errors get patched up accordingly.
+// 
+// \param[in,out] row Pointers to the constraints to orthogonalize. The members
+//                    linear0/1, angular0/1, geometricError and velocityTarget will
+//                    get changed potentially
+// \param[in,out] angSqrtInvInertia0 body b0 angular velocity directions of the
+//                                   constraints provided in parameter row but
+//                                   multiplied by the square root ot the inverse
+//                                   inertia tensor of body b0.
+//                                   I0^(-1/2) * angularDirection0, ...
+//                                   Will be replaced by the orthogonalized vectors.
+//                                   Note: the fourth component of the vectors serves
+//                                   no purpose in this method.
+// \param[in,out] angSqrtInvInertia1 Same as previous parameter but for body b1.
+// \param[in] rowCount Number of entries in row, angSqrtInvInertia0, angSqrtInvInertia1
+// \param[in] eqRowCount Number of entries in row that represent equality constraints.
+//                       The method expects the entries in row to be sorted by equality
+//                       constraints first, followed by inequality constraints. The
+//                       latter get orthogonalized relative to the equality constraints
+//                       but not relative to the other inequality constraints.
+// \param[in] m	Some mass properties of the two bodies b0, b1.
+//
+void orthogonalize(Px1DConstraint** row,
+				   PxVec4* angSqrtInvInertia0,
+				   PxVec4* angSqrtInvInertia1,
+				   PxU32 rowCount,
+				   PxU32 eqRowCount,
+				   const MassProps &m)
+{
+	PX_ASSERT(eqRowCount<=6);
+
+	const FloatV zero = FZero();
+
+	Vec3V lin1m[6], ang1m[6], lin1[6], ang1[6];	
+	Vec4V lin0m[6], ang0m[6];			// must have 0 in the W-field
+	Vec4V lin0AndG[6], ang0AndT[6];
+
+	for(PxU32 i=0;i<rowCount;i++)
+	{
+		Vec4V l0AndG = V4LoadA(&row[i]->linear0.x);		// linear0 and geometric error
+		Vec4V a0AndT = V4LoadA(&row[i]->angular0.x);	// angular0 and velocity target
+
+		Vec3V l1 = Vec3V_From_Vec4V(V4LoadA(&row[i]->linear1.x));
+		Vec3V a1 = Vec3V_From_Vec4V(V4LoadA(&row[i]->angular1.x));
+
+		Vec4V angSqrtL0 = V4LoadA(&angSqrtInvInertia0[i].x);
+		Vec4V angSqrtL1 = V4LoadA(&angSqrtInvInertia1[i].x);
+
+		const PxU32 eliminationRows = PxMin<PxU32>(i, eqRowCount);
+		for(PxU32 j=0;j<eliminationRows;j++)
+		{
+			//
+			// Gram-Schmidt algorithm to get orthogonal vectors. A set of vectors
+			// v0, v1, v2..., can be turned into orthogonal vectors u0, u1, u2, ...
+			// as follows:
+			// 
+			// u0 = v0
+			// u1 = v1 - proj_u0(v1)
+			// u2 = v2 - proj_u0(v2) - proj_u1(v2)
+			// ...
+			// 
+			// proj_u(v) denotes the resulting vector when vector v gets
+			// projected onto the normalized vector u.
+			// 
+			//    __ v
+			//     /|
+			//    /
+			//   /
+			//  /
+			// ----->---------------->
+			//  proj_u(v)            u
+			//
+			// Let <v,u> be the dot/inner product of the two vectors v and u.
+			// 
+			// proj_u(v) = <v,normalize(u)> * normalize(u)
+			//           = <v,u/|u|> * (u/|u|) = <v,u> / (|u|*|u|)  *  u
+			//           = <v,u> / <u,u>  *  u
+			// 
+			// The implementation here maps as follows:
+			//
+			// u = [orthoLinear0, orthoAngular0, orthoLinear1, orthoAngular1]
+			// v = [row[]->linear0, row[]->angular0, row[]->linear1, row[]->angular1]
+			//
+			// Since the solver is using momocity, orthogonality should not be achieved for rows
+			// M^-1 * u but for rows uM = M^(-1/2) * u (with M^(-1/2) being the square root of the
+			// inverse mass matrix). Following the described orthogonalization procedure to turn
+			// v1m into u1m that is orthogonal to u0m:
+			// 
+			// u1M           = v1M - proj_u0M(v1M)
+			// M^(-1/2) * u1 = M^(-1/2) * v1  -  <v1M,u0M>/<u0M,u0M> * M^(-1/2) * u0
+			// 
+			// Since M^(-1/2) is multiplied on the left and right hand side, this can be transformed to:
+			// 
+			// u1 = v1  -  <v1M,u0M>/<u0M,u0M> * u0
+			// 
+			// For the computation of <v1M,u0M>/<u0M,u0M>, the following shall be considered:
+			//
+			// <vM,uM>
+			// = <M^(-1/2) * v, M^(-1/2) * u>
+			// = (M^(-1/2) * v)^T * (M^(-1/2) * u)  (v and u being seen as 12x1 vectors here)
+			// = v^T * M^(-1/2)^T * M^(-1/2) * u
+			// = v^T * M^-1 * u   (M^(-1/2) is a symmetric matrix, thus transposing has no effect)
+			// = <v, M^-1 * u>
+			// 
+			// Applying this:
+			// 
+			// <v1M,u0M>/<u0M,u0M> = <v1, M^-1 * u0> / <u0, M^-1 * u0>
+			//
+			// The code uses:
+			// 
+			// v1m_ = [v1Lin0, I0^(-1/2) * v1Ang0, v1Lin1, I1^(-1/2) * v1Ang1]
+			// u0m_ = [(1/m0) * u0Lin0, I0^(-1/2) * u0Ang0, (1/m1) * u0Lin1, I1^(-1/2) * u0Ang1]
+			// u0m* = u0m_ / <u0M,u0M>  (see variables named lin0m, ang0m, lin1m, ang1m in the code)
+			// 
+			// And then does:
+			// 
+			// <v1m_, u0m*> = <v1m_, u0m_> / <u0M,u0M> = <v, M^-1 * u> / <u0M,u0M> = <v1M,u0M>/<u0M,u0M>
+			// (see variable named t in the code)
+			// 
+			// note: u0, u1, ... get computed for equality constraints. Inequality constraints do not generate new
+			//       "base" vectors. Let's say u0, u1 are from equality constraints, then for inequality constraints
+			//       u2, u3:
+			// 
+			//       u2 = v2 - proj_u0(v2) - proj_u1(v2)
+			//       u3 = v3 - proj_u0(v3) - proj_u1(v3)
+			//
+			//       in other words: the inequality constraints will be orthogonal to the equality constraints but not
+			//       to other inequality constraints.
+			//
+
+			const Vec3V s0 = V3MulAdd(l1, lin1m[j], Vec3V_From_Vec4V_WUndefined(V4Mul(l0AndG, lin0m[j])));
+			const Vec3V s1 = V3MulAdd(Vec3V_From_Vec4V_WUndefined(angSqrtL1), ang1m[j], Vec3V_From_Vec4V_WUndefined(V4Mul(angSqrtL0, ang0m[j])));
+			const FloatV t = V3SumElems(V3Add(s0, s1));
+
+			l0AndG = V4NegScaleSub(lin0AndG[j], t, l0AndG);  // note: this can reduce the error term by the amount covered by the orthogonal base vectors
+			a0AndT = V4NegScaleSub(ang0AndT[j], t, a0AndT);  // note: for equality and inequality constraints, target velocity is expected to be 0
+			l1 = V3NegScaleSub(lin1[j], t, l1);
+			a1 = V3NegScaleSub(ang1[j], t, a1);
+			angSqrtL0 = V4NegScaleSub(V4LoadA(&angSqrtInvInertia0[j].x), t, angSqrtL0);
+			angSqrtL1 = V4NegScaleSub(V4LoadA(&angSqrtInvInertia1[j].x), t, angSqrtL1);
+			// note: angSqrtL1 is equivalent to I1^(-1/2) * a1 (and same goes for angSqrtL0)
+		}
+
+		V4StoreA(l0AndG, &row[i]->linear0.x);
+		V4StoreA(a0AndT, &row[i]->angular0.x);
+		V3StoreA(l1, row[i]->linear1);
+		V3StoreA(a1, row[i]->angular1);
+		V4StoreA(angSqrtL0, &angSqrtInvInertia0[i].x);
+		V4StoreA(angSqrtL1, &angSqrtInvInertia1[i].x);
+
+		if(i<eqRowCount)
+		{
+			lin0AndG[i] = l0AndG;	
+			ang0AndT[i] = a0AndT;
+			lin1[i] = l1;	
+			ang1[i] = a1;
+
+			//
+			// compute the base vector used for orthogonalization (see comments further above).
+			//
+			
+			const Vec3V l0 = Vec3V_From_Vec4V(l0AndG);
+
+			const Vec3V l0m = V3Scale(l0, m.invMass0);  // note that the invMass values used here have invMassScale applied already
+			const Vec3V l1m = V3Scale(l1, m.invMass1);
+			const Vec4V a0m = V4Scale(angSqrtL0, m.invInertiaScale0);
+			const Vec4V a1m = V4Scale(angSqrtL1, m.invInertiaScale1);
+
+			const Vec3V s0 = V3MulAdd(l0, l0m, V3Mul(l1, l1m));
+			const Vec4V s1 = V4MulAdd(a0m, angSqrtL0, V4Mul(a1m, angSqrtL1));
+			const FloatV s = V3SumElems(V3Add(s0, Vec3V_From_Vec4V_WUndefined(s1)));
+			const FloatV a = FSel(FIsGrtr(s, zero), FRecip(s), zero);	// with mass scaling, it's possible for the inner product of a row to be zero
+
+			lin0m[i] = V4Scale(V4ClearW(Vec4V_From_Vec3V(l0m)), a);	
+			ang0m[i] = V4Scale(V4ClearW(a0m), a);
+			lin1m[i] = V3Scale(l1m, a);
+			ang1m[i] = V3Scale(Vec3V_From_Vec4V_WUndefined(a1m), a);
+		}
+	}
+}
+}
+
+// PT: make sure that there's at least a PxU32 after angular0/angular1 in the Px1DConstraint structure (for safe SIMD reads)
+// Note that the code was V4LoadAding these before anyway so it must be safe already.
+// PT: removed for now because some compilers didn't like it
+//PX_COMPILE_TIME_ASSERT((sizeof(Px1DConstraint) - PX_OFFSET_OF_RT(Px1DConstraint, angular0)) >= (sizeof(PxVec3) + sizeof(PxU32)));
+//PX_COMPILE_TIME_ASSERT((sizeof(Px1DConstraint) - PX_OFFSET_OF_RT(Px1DConstraint, angular1)) >= (sizeof(PxVec3) + sizeof(PxU32)));
+
+// PT: TODO: move somewhere else
+PX_FORCE_INLINE Vec3V M33MulV4(const Mat33V& a, const Vec4V b)
+{
+	const FloatV x = V4GetX(b);
+	const FloatV y = V4GetY(b);
+	const FloatV z = V4GetZ(b);
+	const Vec3V v0 = V3Scale(a.col0, x);
+	const Vec3V v1 = V3Scale(a.col1, y);
+	const Vec3V v2 = V3Scale(a.col2, z);
+	const Vec3V v0PlusV1 = V3Add(v0, v1);
+	return V3Add(v0PlusV1, v2);
+}
+
+void preprocessRows(Px1DConstraint** sorted, 
+					Px1DConstraint* rows,
+					PxVec4* angSqrtInvInertia0,
+					PxVec4* angSqrtInvInertia1,
+					PxU32 rowCount,
+					const PxMat33& sqrtInvInertia0F32,
+					const PxMat33& sqrtInvInertia1F32,
+					const PxReal invMass0,
+					const PxReal invMass1,
+					const PxConstraintInvMassScale& ims,
+					bool disablePreprocessing,
+					bool diagonalizeDrive)
+{
+	// j is maxed at 12, typically around 7, so insertion sort is fine
+	for(PxU32 i=0; i<rowCount; i++)
+	{
+		Px1DConstraint* r = rows+i;
+		
+		PxU32 j = i;
+		for(;j>0 && r->solveHint < sorted[j-1]->solveHint; j--)
+			sorted[j] = sorted[j-1];
+
+		sorted[j] = r;
+	}
+
+	for(PxU32 i=0;i<rowCount-1;i++)
+		PX_ASSERT(sorted[i]->solveHint <= sorted[i+1]->solveHint);
+
+	// PT: it is always safe to use "V3LoadU_SafeReadW" on the two first columns of a PxMat33. However in this case the passed matrices
+	// come from PxSolverBodyData::sqrtInvInertia (PGS) or PxTGSSolverBodyTxInertia::sqrtInvInertia (TGS). It is currently unsafe to use
+	// V3LoadU_SafeReadW in the TGS case (the matrix is the last element of the structure). So we keep V3LoadU here for now. For PGS we
+	// have a compile-time-assert (see PxSolverBodyData struct) to ensure safe reads on sqrtInvInertia.
+	// Note that because we only use this in M33MulV4 below, the ClearW calls could also be skipped.
+	const Mat33V sqrtInvInertia0 = Mat33V(
+		V3LoadU_SafeReadW(sqrtInvInertia0F32.column0),
+		V3LoadU_SafeReadW(sqrtInvInertia0F32.column1),
+		V3LoadU(sqrtInvInertia0F32.column2));
+
+	const Mat33V sqrtInvInertia1 = Mat33V(
+		V3LoadU_SafeReadW(sqrtInvInertia1F32.column0),
+		V3LoadU_SafeReadW(sqrtInvInertia1F32.column1),
+		V3LoadU(sqrtInvInertia1F32.column2));
+
+	PX_ASSERT(((uintptr_t(angSqrtInvInertia0)) & 0xF) == 0);
+	PX_ASSERT(((uintptr_t(angSqrtInvInertia1)) & 0xF) == 0);
+
+	for(PxU32 i=0; i<rowCount; ++i)
+	{
+		// PT: new version is 10 instructions smaller
+		//const Vec3V angDelta0_ = M33MulV3(sqrtInvInertia0, V3LoadU(sorted[i]->angular0));
+		//const Vec3V angDelta1_ = M33MulV3(sqrtInvInertia1, V3LoadU(sorted[i]->angular1));
+		const Vec3V angDelta0 = M33MulV4(sqrtInvInertia0, V4LoadA(&sorted[i]->angular0.x));
+		const Vec3V angDelta1 = M33MulV4(sqrtInvInertia1, V4LoadA(&sorted[i]->angular1.x));
+		V4StoreA(Vec4V_From_Vec3V(angDelta0), &angSqrtInvInertia0[i].x);
+		V4StoreA(Vec4V_From_Vec3V(angDelta1), &angSqrtInvInertia1[i].x);
+	}
+
+	if(disablePreprocessing)
+		return;
+
+	MassProps m(invMass0, invMass1, ims);
+	for(PxU32 i=0;i<rowCount;)
+	{
+		const PxU32 groupMajorId = PxU32(sorted[i]->solveHint>>8), start = i++;
+		while(i<rowCount && PxU32(sorted[i]->solveHint>>8) == groupMajorId)
+			i++;
+
+		if(groupMajorId == 4 || (groupMajorId == 8))
+		{
+			//
+			// PGS:
+			// - make all equality constraints orthogonal to each other
+			// - make all inequality constraints orthogonal to all equality constraints
+			// This assumes only PxConstraintSolveHint::eEQUALITY and ::eINEQUALITY is used.
+			//
+			// TGS:
+			// - make all linear equality constraints orthogonal to each other
+			// - make all linear inequality constraints orthogonal to all linear equality constraints
+			// - make all angular equality constraints orthogonal to each other
+			// - make all angular inequality constraints orthogonal to all angular equality constraints
+			// This is achieved by internally turning PxConstraintSolveHint::eEQUALITY and ::eINEQUALITY into
+			// ::eROTATIONAL_EQUALITY and ::eROTATIONAL_INEQUALITY for angular constraints.
+			//
+
+			PxU32 bCount = start;		// count of bilateral constraints 
+			for(; bCount<i && (sorted[bCount]->solveHint&255)==0; bCount++)
+				;
+			orthogonalize(sorted+start, angSqrtInvInertia0+start, angSqrtInvInertia1+start, i-start, bCount-start, m);
+		}
+
+		if(groupMajorId == 1 && diagonalizeDrive)
+		{			
+			PxU32 slerp = start;		// count of bilateral constraints 
+			for(; slerp<i && (sorted[slerp]->solveHint&255)!=2; slerp++)
+				;
+			if(slerp+3 == i)
+				diagonalize(sorted+slerp, angSqrtInvInertia0+slerp, angSqrtInvInertia1+slerp, m);
+
+			PX_ASSERT(i-start==3);
+			diagonalize(sorted+start, angSqrtInvInertia0+start, angSqrtInvInertia1+start, m);
+		}
+	}
+}
+
+PxU32 ConstraintHelper::setupSolverConstraint(
+PxSolverConstraintPrepDesc& prepDesc,
+PxConstraintAllocator& allocator,
+PxReal simDt, PxReal recipSimDt)
+{
+	if (prepDesc.numRows == 0)
+	{
+		prepDesc.desc->constraint = NULL;
+		prepDesc.desc->writeBack = NULL;
+		prepDesc.desc->constraintLengthOver16 = 0;
+		return 0;
+	}
+
+	PxSolverConstraintDesc& desc = *prepDesc.desc;
+
+	const bool isExtended = (desc.linkIndexA != PxSolverConstraintDesc::RIGID_BODY)
+		|| (desc.linkIndexB != PxSolverConstraintDesc::RIGID_BODY);
+
+	const PxU32 stride = isExtended ? sizeof(SolverConstraint1DExt) : sizeof(SolverConstraint1D);
+	const PxU32 constraintLength = sizeof(SolverConstraint1DHeader) + stride * prepDesc.numRows;
+	
+	//KS - +16 is for the constraint progress counter, which needs to be the last element in the constraint (so that we
+	//know SPU DMAs have completed)
+	PxU8* ptr = allocator.reserveConstraintData(constraintLength + 16u);
+	if(!checkConstraintDataPtr<true>(ptr))
+		return 0;
+
+	desc.constraint = ptr;
+
+	setConstraintLength(desc,constraintLength);
+
+	desc.writeBack = prepDesc.writeback;
+
+	PxMemSet(desc.constraint, 0, constraintLength);
+
+	SolverConstraint1DHeader* header = reinterpret_cast<SolverConstraint1DHeader*>(desc.constraint);
+	PxU8* constraints = desc.constraint + sizeof(SolverConstraint1DHeader);
+	init(*header, PxTo8(prepDesc.numRows), isExtended, prepDesc.invMassScales);
+	header->body0WorldOffset = prepDesc.body0WorldOffset;
+	header->linBreakImpulse = prepDesc.linBreakForce * simDt;
+	header->angBreakImpulse = prepDesc.angBreakForce * simDt;
+	header->breakable = PxU8((prepDesc.linBreakForce != PX_MAX_F32) || (prepDesc.angBreakForce != PX_MAX_F32));
+	header->invMass0D0 = prepDesc.data0->invMass * prepDesc.invMassScales.linear0;
+	header->invMass1D1 = prepDesc.data1->invMass * prepDesc.invMassScales.linear1;
+
+	PX_ALIGN(16, PxVec4) angSqrtInvInertia0[MAX_CONSTRAINT_ROWS];
+	PX_ALIGN(16, PxVec4) angSqrtInvInertia1[MAX_CONSTRAINT_ROWS];
+	
+	Px1DConstraint* sorted[MAX_CONSTRAINT_ROWS];
+
+	preprocessRows(sorted, prepDesc.rows, angSqrtInvInertia0, angSqrtInvInertia1, prepDesc.numRows, 
+		prepDesc.data0->sqrtInvInertia, prepDesc.data1->sqrtInvInertia, prepDesc.data0->invMass, prepDesc.data1->invMass, 
+		prepDesc.invMassScales, isExtended || prepDesc.disablePreprocessing, prepDesc.improvedSlerp);
+
+	const PxReal erp = 1.0f;
+
+	PxU32 outCount = 0;
+
+	const SolverExtBody eb0(reinterpret_cast<const void*>(prepDesc.body0), prepDesc.data0, desc.linkIndexA);
+	const SolverExtBody eb1(reinterpret_cast<const void*>(prepDesc.body1), prepDesc.data1, desc.linkIndexB);
+
+	PxReal cfm = 0.f;
+	if (isExtended)
+	{
+		cfm = PxMax(eb0.getCFM(), eb1.getCFM());
+	}
+
+	for (PxU32 i = 0; i<prepDesc.numRows; i++)
+	{
+		PxPrefetchLine(constraints, 128);
+		SolverConstraint1D& s = *reinterpret_cast<SolverConstraint1D *>(constraints);
+		Px1DConstraint& c = *sorted[i];
+
+		PxReal minImpulse, maxImpulse;
+		computeMinMaxImpulseOrForceAsImpulse(
+			c.minImpulse, c.maxImpulse,
+			c.flags & Px1DConstraintFlag::eHAS_DRIVE_LIMIT, prepDesc.driveLimitsAreForces, simDt,
+			minImpulse, maxImpulse);
+
+		PxReal unitResponse;
+		PxReal jointSpeedForRestitutionBounce = 0.0f;
+		PxReal initJointSpeed = 0.0f;
+
+		const PxReal minResponseThreshold = prepDesc.minResponseThreshold;
+
+		if(!isExtended)
+		{
+			//The theoretical formulation of the Jacobian J has 4 terms {linear0, angular0, linear1, angular1}
+			//s.lin0 and s.lin1 match J.linear0 and J.linear1.
+			//We compute s.ang0 and s.ang1 but these *must not* be confused with the angular terms of the theoretical Jacobian.
+			//s.ang0 and s.ang1 are part of the momocity system that computes deltas to the angular motion that are neither
+			//angular momentum nor angular velocity.
+			//s.ang0 = I0^(-1/2) * J.angular0 with I0 denoting the inertia of body0 in the world frame.
+			//s.ang1 = I1^(-1/2) * J.angular1 with I1 denoting the inertia of body1 in the world frame.
+			//We then compute the unit response r = J * M^-1 * JTranspose with M denoting the mass matrix.
+			//r = (1/m0)*|J.linear0|^2 + (1/m1)*|J.linear1|^2 + J.angular0 * I0^-1 * J.angular0 +  J.angular1 * I1^-1 * J.angular1
+			//We can write out the term [J.angular0 * I0^-1 * J.angular0] in a different way:
+			//J.angular0 * I0^-1 * J.angular0 = [J.angular0 * I0^(-1/2)] dot  [I0^(-1/2) * J.angular0]
+			//Noting that s.ang0 =  J.angular0 * I0^(-1/2) and the equivalent expression for body 1, we have the following:
+			//r = 	(1/m0)*|s.lin0|^2 + (1/m1)*|s.lin1|^2 + |s.ang0|^2 + |s.ang1|^2 
+			//Init vel is computed using the standard Jacobian method because at this stage we have linear and angular velocities.
+			//The code that resolves the constraints instead accumulates delta linear velocities and delta angular momocities compatible with 
+			//s.lin0/lin1 and s.ang0/ang1. Right now, though, we have only linear and angular velocities compatible with the theoretical
+			//Jacobian form:
+			//initVel = J.linear0.dot(linVel0) + J.angular0.dot(angvel0) - J.linear1.dot(linVel1) - J.angular1.dot(angvel1) 
+			init(s, c.linear0, c.linear1, PxVec3(angSqrtInvInertia0[i].x, angSqrtInvInertia0[i].y, angSqrtInvInertia0[i].z),
+				PxVec3(angSqrtInvInertia1[i].x, angSqrtInvInertia1[i].y, angSqrtInvInertia1[i].z), minImpulse, maxImpulse);
+			s.ang0Writeback = c.angular0;
+			const PxReal resp0 = s.lin0.magnitudeSquared() * prepDesc.data0->invMass * prepDesc.invMassScales.linear0 + s.ang0.magnitudeSquared() * prepDesc.invMassScales.angular0;
+			const PxReal resp1 = s.lin1.magnitudeSquared() * prepDesc.data1->invMass * prepDesc.invMassScales.linear1 + s.ang1.magnitudeSquared() * prepDesc.invMassScales.angular1;
+			unitResponse = resp0 + resp1;
+			initJointSpeed = jointSpeedForRestitutionBounce = prepDesc.data0->projectVelocity(c.linear0, c.angular0) - prepDesc.data1->projectVelocity(c.linear1, c.angular1);
+		}
+		else
+		{
+			//this is articulation/deformable volume
+			init(s, c.linear0, c.linear1, c.angular0, c.angular1, minImpulse, maxImpulse);
+			SolverConstraint1DExt& e = static_cast<SolverConstraint1DExt&>(s);
+
+			const Cm::SpatialVector resp0 = createImpulseResponseVector(e.lin0, e.ang0, eb0);
+			const Cm::SpatialVector resp1 = createImpulseResponseVector(-e.lin1, -e.ang1, eb1);
+			unitResponse = getImpulseResponse(eb0, resp0, unsimdRef(e.deltaVA), prepDesc.invMassScales.linear0, prepDesc.invMassScales.angular0,
+				eb1, resp1, unsimdRef(e.deltaVB), prepDesc.invMassScales.linear1, prepDesc.invMassScales.angular1, false);
+
+			//Add CFM term!
+
+			if(unitResponse <= DY_ARTICULATION_MIN_RESPONSE)
+				continue;
+			unitResponse += cfm;
+			
+			s.ang0Writeback = c.angular0;
+			s.lin0 = resp0.linear;
+			s.ang0 = resp0.angular;
+			s.lin1 = -resp1.linear;
+			s.ang1 = -resp1.angular;
+			PxReal vel0, vel1;
+			const bool b0IsRigidDynamic = (eb0.mLinkIndex == PxSolverConstraintDesc::RIGID_BODY);
+			const bool b1IsRigidDynamic = (eb1.mLinkIndex == PxSolverConstraintDesc::RIGID_BODY);
+			if(needsNormalVel(c) || b0IsRigidDynamic || b1IsRigidDynamic)
+			{
+				vel0 = eb0.projectVelocity(c.linear0, c.angular0);
+				vel1 = eb1.projectVelocity(c.linear1, c.angular1);
+
+				Dy::computeJointSpeedPGS(vel0, b0IsRigidDynamic, vel1, b1IsRigidDynamic, jointSpeedForRestitutionBounce, initJointSpeed);
+			}
+
+			//minResponseThreshold = PxMax(minResponseThreshold, DY_ARTICULATION_MIN_RESPONSE);
+		}
+
+		const PxReal recipUnitResponse = computeRecipUnitResponse(unitResponse, minResponseThreshold);
+		s.setSolverConstants(
+			compute1dConstraintSolverConstantsPGS(
+			c.flags, 
+			c.mods.spring.stiffness, c.mods.spring.damping, 
+			c.mods.bounce.restitution, c.mods.bounce.velocityThreshold, 
+			c.geometricError, c.velocityTarget,
+			jointSpeedForRestitutionBounce, initJointSpeed, 
+			unitResponse, recipUnitResponse, 
+			erp, 
+			simDt, recipSimDt));
+
+		if(c.flags & Px1DConstraintFlag::eOUTPUT_FORCE)
+			s.setOutputForceFlag(true);
+
+		outCount++;
+
+		constraints += stride;
+	}
+
+	//Reassign count to the header because we may have skipped some rows if they were degenerate
+	header->count = PxU8(outCount);
+	return prepDesc.numRows;
+}
+
+PxU32 SetupSolverConstraint(SolverConstraintShaderPrepDesc& shaderDesc,
+	PxSolverConstraintPrepDesc& prepDesc,
+	PxConstraintAllocator& allocator,
+	PxReal dt, PxReal invdt)
+{
+	// LL shouldn't see broken constraints
+	
+	PX_ASSERT(!(reinterpret_cast<ConstraintWriteback*>(prepDesc.writeback)->isBroken()));
+
+	setConstraintLength(*prepDesc.desc, 0);
+
+	if (!shaderDesc.solverPrep)
+		return 0;
+
+	//PxU32 numAxisConstraints = 0;
+
+	Px1DConstraint rows[MAX_CONSTRAINT_ROWS];
+	setupConstraintRows(rows, MAX_CONSTRAINT_ROWS);
+
+	prepDesc.invMassScales.linear0 = prepDesc.invMassScales.linear1 = prepDesc.invMassScales.angular0 = prepDesc.invMassScales.angular1 = 1.0f;
+	prepDesc.body0WorldOffset = PxVec3(0.0f);
+
+	PxVec3p unused_ra, unused_rb;
+
+	//TAG::solverprepcall
+	prepDesc.numRows = prepDesc.disableConstraint ? 0 : (*shaderDesc.solverPrep)(rows,
+		prepDesc.body0WorldOffset,
+		MAX_CONSTRAINT_ROWS,
+		prepDesc.invMassScales,
+		shaderDesc.constantBlock,
+		prepDesc.bodyFrame0, prepDesc.bodyFrame1, prepDesc.extendedLimits, unused_ra, unused_rb);
+
+	prepDesc.rows = rows;
+
+	return ConstraintHelper::setupSolverConstraint(prepDesc, allocator, dt, invdt);
+}
+
+}
+
+}
--- a/engine/third_party/physx/source/lowleveldynamics/src/DyConstraintSetupBlock.cpp
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DyConstraintSetupBlock.cpp
@@ -0,0 +1,514 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#include "foundation/PxMemory.h"
+#include "DyConstraintPrep.h"
+#include "PxsRigidBody.h"
+#include "DySolverConstraint1D.h"
+#include "DySolverConstraint1D4.h"
+#include "foundation/PxSort.h"
+#include "PxcConstraintBlockStream.h"
+#include "DyArticulationContactPrep.h"
+#include "DyAllocator.h"
+
+namespace physx
+{
+
+using namespace aos;
+
+namespace Dy
+{
+
+SolverConstraintPrepState::Enum setupSolverConstraint4
+		(PxSolverConstraintPrepDesc* PX_RESTRICT constraintDescs,
+		const PxReal dt, const PxReal recipdt, PxU32& totalRows,
+		PxConstraintAllocator& allocator, PxU32 maxRows, bool residualReportingEnabled);
+
+SolverConstraintPrepState::Enum setupSolverConstraint4
+(SolverConstraintShaderPrepDesc* PX_RESTRICT constraintShaderDescs,
+PxSolverConstraintPrepDesc* PX_RESTRICT constraintDescs,
+const PxReal dt, const PxReal recipdt, PxU32& totalRows,
+PxConstraintAllocator& allocator, bool residualReportingEnabled)
+
+{
+	//KS - we will never get here with constraints involving articulations so we don't need to stress about those in here
+
+	totalRows = 0;
+
+	Px1DConstraint allRows[MAX_CONSTRAINT_ROWS * 4];
+	Px1DConstraint* rows = allRows;
+	Px1DConstraint* rows2 = allRows;
+	
+	PxU32 maxRows = 0;
+	PxU32 nbToPrep = MAX_CONSTRAINT_ROWS;
+
+	for (PxU32 a = 0; a < 4; ++a)
+	{
+		SolverConstraintShaderPrepDesc& shaderDesc = constraintShaderDescs[a];
+		PxSolverConstraintPrepDesc& desc = constraintDescs[a];
+
+		if (!shaderDesc.solverPrep)
+			return SolverConstraintPrepState::eUNBATCHABLE;
+
+		PX_ASSERT(rows2 + nbToPrep <= allRows + MAX_CONSTRAINT_ROWS*4);
+		setupConstraintRows(rows2, nbToPrep);
+		rows2 += nbToPrep;
+
+		desc.invMassScales.linear0 = desc.invMassScales.linear1 = desc.invMassScales.angular0 = desc.invMassScales.angular1 = 1.0f;
+		desc.body0WorldOffset = PxVec3(0.0f);
+
+		PxVec3p unused_ra, unused_rb;
+
+		//TAG:solverprepcall
+		const PxU32 constraintCount = desc.disableConstraint ? 0 : (*shaderDesc.solverPrep)(rows,
+			desc.body0WorldOffset,
+			MAX_CONSTRAINT_ROWS,
+			desc.invMassScales,
+			shaderDesc.constantBlock,
+			desc.bodyFrame0, desc.bodyFrame1, desc.extendedLimits, unused_ra, unused_rb);
+
+		nbToPrep = constraintCount;
+		maxRows = PxMax(constraintCount, maxRows);
+
+		if (constraintCount == 0)
+			return SolverConstraintPrepState::eUNBATCHABLE;
+
+		desc.rows = rows;
+		desc.numRows = constraintCount;
+		rows += constraintCount;
+	}
+
+	return setupSolverConstraint4(constraintDescs, dt, recipdt, totalRows, allocator, maxRows, residualReportingEnabled);
+}
+
+SolverConstraintPrepState::Enum setupSolverConstraint4
+(PxSolverConstraintPrepDesc* PX_RESTRICT constraintDescs,
+const PxReal simDt, const PxReal recipSimDt, PxU32& totalRows,
+PxConstraintAllocator& allocator, PxU32 maxRows, bool residualReportingEnabled)
+{
+	const Vec4V zero = V4Zero();
+	Px1DConstraint* allSorted[MAX_CONSTRAINT_ROWS * 4];
+	PxU32 startIndex[4];
+	PX_ALIGN(16, PxVec4) angSqrtInvInertia0[MAX_CONSTRAINT_ROWS * 4];
+	PX_ALIGN(16, PxVec4) angSqrtInvInertia1[MAX_CONSTRAINT_ROWS * 4];
+
+	PxU32 numRows = 0;
+
+	for (PxU32 a = 0; a < 4; ++a)
+	{
+		startIndex[a] = numRows;
+		PxSolverConstraintPrepDesc& desc = constraintDescs[a];
+		Px1DConstraint** sorted = allSorted + numRows;
+
+		preprocessRows(sorted, desc.rows, angSqrtInvInertia0 + numRows, angSqrtInvInertia1 + numRows, desc.numRows, 
+			desc.data0->sqrtInvInertia, desc.data1->sqrtInvInertia, desc.data0->invMass, desc.data1->invMass, 
+			desc.invMassScales, desc.disablePreprocessing, desc.improvedSlerp);
+
+		numRows += desc.numRows;
+	}
+
+	const PxU32 stride = residualReportingEnabled ? sizeof(SolverConstraint1DDynamic4WithResidual) : sizeof(SolverConstraint1DDynamic4);
+
+	const PxU32 constraintLength = sizeof(SolverConstraint1DHeader4) + stride * maxRows;
+
+	//KS - +16 is for the constraint progress counter, which needs to be the last element in the constraint (so that we
+	//know SPU DMAs have completed)
+	PxU8* ptr = allocator.reserveConstraintData(constraintLength + 16u);
+	if(!checkConstraintDataPtr<true>(ptr))
+	{
+		for(PxU32 a = 0; a < 4; ++a)
+		{
+			PxSolverConstraintPrepDesc& desc = constraintDescs[a];
+			desc.desc->constraint = NULL;
+			setConstraintLength(*desc.desc, 0);
+			desc.desc->writeBack = desc.writeback;
+		}
+
+		return SolverConstraintPrepState::eOUT_OF_MEMORY;
+	}
+
+	//desc.constraint = ptr;
+
+	totalRows = numRows;
+
+	for(PxU32 a = 0; a < 4; ++a)
+	{
+		PxSolverConstraintPrepDesc& desc = constraintDescs[a];
+		desc.desc->constraint = ptr;
+		setConstraintLength(*desc.desc, constraintLength);
+		desc.desc->writeBack = desc.writeback;
+	}
+
+	const PxReal erp[4] = { 1.0f, 1.0f, 1.0f, 1.0f};
+	//OK, now we build all 4 constraints into a single set of rows
+
+	{
+		PxU8* currPtr = ptr;
+		SolverConstraint1DHeader4* header = reinterpret_cast<SolverConstraint1DHeader4*>(currPtr);
+		currPtr += sizeof(SolverConstraint1DHeader4);
+
+		const PxSolverBodyData& bd00 = *constraintDescs[0].data0;
+		const PxSolverBodyData& bd01 = *constraintDescs[1].data0;
+		const PxSolverBodyData& bd02 = *constraintDescs[2].data0;
+		const PxSolverBodyData& bd03 = *constraintDescs[3].data0;
+
+		const PxSolverBodyData& bd10 = *constraintDescs[0].data1;
+		const PxSolverBodyData& bd11 = *constraintDescs[1].data1;
+		const PxSolverBodyData& bd12 = *constraintDescs[2].data1;
+		const PxSolverBodyData& bd13 = *constraintDescs[3].data1;
+
+		//Load up masses, invInertia, velocity etc.
+
+		const Vec4V invMassScale0 = V4LoadXYZW(constraintDescs[0].invMassScales.linear0, constraintDescs[1].invMassScales.linear0, 
+			constraintDescs[2].invMassScales.linear0, constraintDescs[3].invMassScales.linear0);
+		const Vec4V invMassScale1 = V4LoadXYZW(constraintDescs[0].invMassScales.linear1, constraintDescs[1].invMassScales.linear1, 
+			constraintDescs[2].invMassScales.linear1, constraintDescs[3].invMassScales.linear1);
+
+		const Vec4V iMass0 = V4LoadXYZW(bd00.invMass, bd01.invMass, bd02.invMass, bd03.invMass);
+
+		const Vec4V iMass1 = V4LoadXYZW(bd10.invMass, bd11.invMass, bd12.invMass, bd13.invMass);
+
+		const Vec4V invMass0 = V4Mul(iMass0, invMassScale0);
+		const Vec4V invMass1 = V4Mul(iMass1, invMassScale1);
+
+		const Vec4V invInertiaScale0 = V4LoadXYZW(constraintDescs[0].invMassScales.angular0, constraintDescs[1].invMassScales.angular0, 
+			constraintDescs[2].invMassScales.angular0, constraintDescs[3].invMassScales.angular0);
+		const Vec4V invInertiaScale1 = V4LoadXYZW(constraintDescs[0].invMassScales.angular1, constraintDescs[1].invMassScales.angular1, 
+			constraintDescs[2].invMassScales.angular1, constraintDescs[3].invMassScales.angular1);
+
+		//Velocities
+		Vec4V linVel00 = V4LoadA(&bd00.linearVelocity.x);
+		Vec4V linVel01 = V4LoadA(&bd10.linearVelocity.x);
+		Vec4V angVel00 = V4LoadA(&bd00.angularVelocity.x);
+		Vec4V angVel01 = V4LoadA(&bd10.angularVelocity.x);
+
+		Vec4V linVel10 = V4LoadA(&bd01.linearVelocity.x);
+		Vec4V linVel11 = V4LoadA(&bd11.linearVelocity.x);
+		Vec4V angVel10 = V4LoadA(&bd01.angularVelocity.x);
+		Vec4V angVel11 = V4LoadA(&bd11.angularVelocity.x);
+
+		Vec4V linVel20 = V4LoadA(&bd02.linearVelocity.x);
+		Vec4V linVel21 = V4LoadA(&bd12.linearVelocity.x);
+		Vec4V angVel20 = V4LoadA(&bd02.angularVelocity.x);
+		Vec4V angVel21 = V4LoadA(&bd12.angularVelocity.x);
+
+		Vec4V linVel30 = V4LoadA(&bd03.linearVelocity.x);
+		Vec4V linVel31 = V4LoadA(&bd13.linearVelocity.x);
+		Vec4V angVel30 = V4LoadA(&bd03.angularVelocity.x);
+		Vec4V angVel31 = V4LoadA(&bd13.angularVelocity.x);
+
+		Vec4V linVel0T0, linVel0T1, linVel0T2;
+		Vec4V linVel1T0, linVel1T1, linVel1T2;
+		Vec4V angVel0T0, angVel0T1, angVel0T2;
+		Vec4V angVel1T0, angVel1T1, angVel1T2;
+
+		PX_TRANSPOSE_44_34(linVel00, linVel10, linVel20, linVel30, linVel0T0, linVel0T1, linVel0T2);
+		PX_TRANSPOSE_44_34(linVel01, linVel11, linVel21, linVel31, linVel1T0, linVel1T1, linVel1T2);
+		PX_TRANSPOSE_44_34(angVel00, angVel10, angVel20, angVel30, angVel0T0, angVel0T1, angVel0T2);
+		PX_TRANSPOSE_44_34(angVel01, angVel11, angVel21, angVel31, angVel1T0, angVel1T1, angVel1T2);
+
+		//body world offsets
+		Vec4V workOffset0 = V4LoadU(&constraintDescs[0].body0WorldOffset.x);
+		Vec4V workOffset1 = V4LoadU(&constraintDescs[1].body0WorldOffset.x);
+		Vec4V workOffset2 = V4LoadU(&constraintDescs[2].body0WorldOffset.x);
+		Vec4V workOffset3 = V4LoadU(&constraintDescs[3].body0WorldOffset.x);
+
+		Vec4V workOffsetX, workOffsetY, workOffsetZ;
+
+		PX_TRANSPOSE_44_34(workOffset0, workOffset1, workOffset2, workOffset3, workOffsetX, workOffsetY, workOffsetZ);
+
+		const FloatV dtV = FLoad(simDt);
+		const Vec4V linBreakForce = V4LoadXYZW(	constraintDescs[0].linBreakForce, constraintDescs[1].linBreakForce,
+												constraintDescs[2].linBreakForce, constraintDescs[3].linBreakForce);
+		const Vec4V angBreakForce = V4LoadXYZW(	constraintDescs[0].angBreakForce, constraintDescs[1].angBreakForce,
+												constraintDescs[2].angBreakForce, constraintDescs[3].angBreakForce);
+
+		header->break0 = PxU8((constraintDescs[0].linBreakForce != PX_MAX_F32) || (constraintDescs[0].angBreakForce != PX_MAX_F32));
+		header->break1 = PxU8((constraintDescs[1].linBreakForce != PX_MAX_F32) || (constraintDescs[1].angBreakForce != PX_MAX_F32));
+		header->break2 = PxU8((constraintDescs[2].linBreakForce != PX_MAX_F32) || (constraintDescs[2].angBreakForce != PX_MAX_F32));
+		header->break3 = PxU8((constraintDescs[3].linBreakForce != PX_MAX_F32) || (constraintDescs[3].angBreakForce != PX_MAX_F32));
+
+		//OK, I think that's everything loaded in
+
+		header->invMass0D0 = invMass0;
+		header->invMass1D1 = invMass1;
+		header->angD0 = invInertiaScale0;
+		header->angD1 = invInertiaScale1;
+		header->body0WorkOffsetX = workOffsetX;
+		header->body0WorkOffsetY = workOffsetY;
+		header->body0WorkOffsetZ = workOffsetZ;
+
+		header->count = maxRows;
+		header->type = DY_SC_TYPE_BLOCK_1D;
+		header->linBreakImpulse = V4Scale(linBreakForce, dtV);
+		header->angBreakImpulse = V4Scale(angBreakForce, dtV);
+		header->count0 = PxTo8(constraintDescs[0].numRows);
+		header->count1 = PxTo8(constraintDescs[1].numRows);
+		header->count2 = PxTo8(constraintDescs[2].numRows);
+		header->count3 = PxTo8(constraintDescs[3].numRows);
+
+		//Now we loop over the constraints and build the results...
+
+		PxU32 index0 = 0;
+		PxU32 endIndex0 = constraintDescs[0].numRows - 1;
+		PxU32 index1 = startIndex[1];
+		PxU32 endIndex1 = index1 + constraintDescs[1].numRows - 1;
+		PxU32 index2 = startIndex[2];
+		PxU32 endIndex2 = index2 + constraintDescs[2].numRows - 1;
+		PxU32 index3 = startIndex[3];
+		PxU32 endIndex3 = index3 + constraintDescs[3].numRows - 1;
+
+		for(PxU32 a = 0; a < maxRows; ++a)
+		{	
+			SolverConstraint1DDynamic4* c = reinterpret_cast<SolverConstraint1DDynamic4*>(currPtr);
+			currPtr += stride;
+
+			Px1DConstraint* con0 = allSorted[index0];
+			Px1DConstraint* con1 = allSorted[index1];
+			Px1DConstraint* con2 = allSorted[index2];
+			Px1DConstraint* con3 = allSorted[index3];
+
+			Vec4V cangDelta00 = V4LoadA(&angSqrtInvInertia0[index0].x);
+			Vec4V cangDelta01 = V4LoadA(&angSqrtInvInertia0[index1].x);
+			Vec4V cangDelta02 = V4LoadA(&angSqrtInvInertia0[index2].x);
+			Vec4V cangDelta03 = V4LoadA(&angSqrtInvInertia0[index3].x);
+
+			Vec4V cangDelta10 = V4LoadA(&angSqrtInvInertia1[index0].x);
+			Vec4V cangDelta11 = V4LoadA(&angSqrtInvInertia1[index1].x);
+			Vec4V cangDelta12 = V4LoadA(&angSqrtInvInertia1[index2].x);
+			Vec4V cangDelta13 = V4LoadA(&angSqrtInvInertia1[index3].x);
+
+			index0 = index0 == endIndex0 ? index0 : index0 + 1;
+			index1 = index1 == endIndex1 ? index1 : index1 + 1;
+			index2 = index2 == endIndex2 ? index2 : index2 + 1;
+			index3 = index3 == endIndex3 ? index3 : index3 + 1;
+
+			PxReal minImpulse0, minImpulse1, minImpulse2, minImpulse3;
+			PxReal maxImpulse0, maxImpulse1, maxImpulse2, maxImpulse3;
+			Dy::computeMinMaxImpulseOrForceAsImpulse(
+				con0->minImpulse, con0->maxImpulse,		
+				con0->flags & Px1DConstraintFlag::eHAS_DRIVE_LIMIT, constraintDescs[0].driveLimitsAreForces, simDt,
+				minImpulse0, maxImpulse0);
+			Dy::computeMinMaxImpulseOrForceAsImpulse(
+				con1->minImpulse, con1->maxImpulse,		
+				con1->flags & Px1DConstraintFlag::eHAS_DRIVE_LIMIT, constraintDescs[1].driveLimitsAreForces, simDt,
+				minImpulse1, maxImpulse1);
+			Dy::computeMinMaxImpulseOrForceAsImpulse(
+				con2->minImpulse, con2->maxImpulse,		
+				con2->flags & Px1DConstraintFlag::eHAS_DRIVE_LIMIT, constraintDescs[2].driveLimitsAreForces, simDt,
+				minImpulse2, maxImpulse2);
+			Dy::computeMinMaxImpulseOrForceAsImpulse(
+				con3->minImpulse, con3->maxImpulse,		
+				con3->flags & Px1DConstraintFlag::eHAS_DRIVE_LIMIT, constraintDescs[3].driveLimitsAreForces, simDt,
+				minImpulse3, maxImpulse3);
+			const Vec4V minImpulse = V4LoadXYZW(minImpulse0, minImpulse1, minImpulse2, minImpulse3);
+			const Vec4V maxImpulse = V4LoadXYZW(maxImpulse0, maxImpulse1, maxImpulse2, maxImpulse3);
+			
+			Vec4V clin00 = V4LoadA(&con0->linear0.x);
+			Vec4V clin01 = V4LoadA(&con1->linear0.x);
+			Vec4V clin02 = V4LoadA(&con2->linear0.x);
+			Vec4V clin03 = V4LoadA(&con3->linear0.x);
+
+			Vec4V cang00 = V4LoadA(&con0->angular0.x);
+			Vec4V cang01 = V4LoadA(&con1->angular0.x);
+			Vec4V cang02 = V4LoadA(&con2->angular0.x);
+			Vec4V cang03 = V4LoadA(&con3->angular0.x);
+
+			Vec4V clin0X, clin0Y, clin0Z;
+			Vec4V cang0X, cang0Y, cang0Z;
+			
+			PX_TRANSPOSE_44_34(clin00, clin01, clin02, clin03, clin0X, clin0Y, clin0Z);
+			PX_TRANSPOSE_44_34(cang00, cang01, cang02, cang03, cang0X, cang0Y, cang0Z);
+			
+			Vec4V angDelta0X, angDelta0Y, angDelta0Z;
+
+			PX_TRANSPOSE_44_34(cangDelta00, cangDelta01, cangDelta02, cangDelta03, angDelta0X, angDelta0Y, angDelta0Z);
+
+			c->flags[0] = 0;
+			c->flags[1] = 0;
+			c->flags[2] = 0;
+			c->flags[3] = 0;
+
+			c->lin0X = clin0X;
+			c->lin0Y = clin0Y;
+			c->lin0Z = clin0Z;
+			c->ang0X = angDelta0X;
+			c->ang0Y = angDelta0Y;
+			c->ang0Z = angDelta0Z;
+			c->ang0WritebackX = cang0X;
+			c->ang0WritebackY = cang0Y;
+			c->ang0WritebackZ = cang0Z;
+
+			c->minImpulse = minImpulse;
+			c->maxImpulse = maxImpulse;
+			c->appliedForce = zero;
+			if (residualReportingEnabled) 
+			{
+				SolverConstraint1DDynamic4WithResidual* cc = static_cast<SolverConstraint1DDynamic4WithResidual*>(c);
+				cc->residualPosIter = zero;
+				cc->residualVelIter = zero;
+			}
+
+			const Vec4V lin0MagSq = V4MulAdd(clin0Z, clin0Z, V4MulAdd(clin0Y, clin0Y, V4Mul(clin0X, clin0X)));
+			const Vec4V cang0DotAngDelta = V4MulAdd(angDelta0Z, angDelta0Z, V4MulAdd(angDelta0Y, angDelta0Y, V4Mul(angDelta0X, angDelta0X)));
+			c->flags[0] = 0;
+			c->flags[1] = 0;
+			c->flags[2] = 0;
+			c->flags[3] = 0;
+
+			Vec4V unitResponse = V4MulAdd(lin0MagSq, invMass0, V4Mul(cang0DotAngDelta, invInertiaScale0));
+
+			Vec4V clin10 = V4LoadA(&con0->linear1.x);
+			Vec4V clin11 = V4LoadA(&con1->linear1.x);
+			Vec4V clin12 = V4LoadA(&con2->linear1.x);
+			Vec4V clin13 = V4LoadA(&con3->linear1.x);
+
+			Vec4V cang10 = V4LoadA(&con0->angular1.x);
+			Vec4V cang11 = V4LoadA(&con1->angular1.x);
+			Vec4V cang12 = V4LoadA(&con2->angular1.x);
+			Vec4V cang13 = V4LoadA(&con3->angular1.x);
+
+			Vec4V clin1X, clin1Y, clin1Z;
+			Vec4V cang1X, cang1Y, cang1Z;
+			PX_TRANSPOSE_44_34(clin10, clin11, clin12, clin13, clin1X, clin1Y, clin1Z);
+			PX_TRANSPOSE_44_34(cang10, cang11, cang12, cang13, cang1X, cang1Y, cang1Z);
+
+			Vec4V angDelta1X, angDelta1Y, angDelta1Z;
+
+			PX_TRANSPOSE_44_34(cangDelta10, cangDelta11, cangDelta12, cangDelta13, angDelta1X, angDelta1Y, angDelta1Z);
+
+			const Vec4V lin1MagSq = V4MulAdd(clin1Z, clin1Z, V4MulAdd(clin1Y, clin1Y, V4Mul(clin1X, clin1X)));
+			const Vec4V cang1DotAngDelta = V4MulAdd(angDelta1Z, angDelta1Z, V4MulAdd(angDelta1Y, angDelta1Y, V4Mul(angDelta1X, angDelta1X)));
+
+			c->lin1X = clin1X;
+			c->lin1Y = clin1Y;
+			c->lin1Z = clin1Z;
+
+			c->ang1X = angDelta1X;
+			c->ang1Y = angDelta1Y;
+			c->ang1Z = angDelta1Z;
+
+			unitResponse = V4Add(unitResponse, V4MulAdd(lin1MagSq, invMass1, V4Mul(cang1DotAngDelta, invInertiaScale1)));
+
+			Vec4V linProj0(V4Mul(clin0X, linVel0T0));
+			Vec4V linProj1(V4Mul(clin1X, linVel1T0));
+			Vec4V angProj0(V4Mul(cang0X, angVel0T0));
+			Vec4V angProj1(V4Mul(cang1X, angVel1T0));
+
+			linProj0 = V4MulAdd(clin0Y, linVel0T1, linProj0);
+			linProj1 = V4MulAdd(clin1Y, linVel1T1, linProj1);
+			angProj0 = V4MulAdd(cang0Y, angVel0T1, angProj0);
+			angProj1 = V4MulAdd(cang1Y, angVel1T1, angProj1);
+			
+			linProj0 = V4MulAdd(clin0Z, linVel0T2, linProj0);
+			linProj1 = V4MulAdd(clin1Z, linVel1T2, linProj1);
+			angProj0 = V4MulAdd(cang0Z, angVel0T2, angProj0);
+			angProj1 = V4MulAdd(cang1Z, angVel1T2, angProj1);
+
+			const Vec4V projectVel0 = V4Add(linProj0, angProj0);
+			const Vec4V projectVel1 = V4Add(linProj1, angProj1);
+			
+			const Vec4V normalVel = V4Sub(projectVel0, projectVel1);
+
+			{
+				//const inputs.
+				const Px1DConstraint* constraints[4] ={con0, con1, con2, con3};
+				const PxReal* unitResponses4 = reinterpret_cast<const PxReal*>(&unitResponse);
+				const PxReal* initJointSpeeds4 = reinterpret_cast<const PxReal*>(&normalVel);
+
+				//outputs
+				PxReal* biasedConstants4 = reinterpret_cast<PxReal*>(&c->constant);
+				PxReal* unbiasedConstants4 = reinterpret_cast<PxReal*>(&c->unbiasedConstant);
+				PxReal* velMultipliers4 = reinterpret_cast<PxReal*>(&c->velMultiplier);
+				PxReal* impulseMultipliers4 = reinterpret_cast<PxReal*>(&c->impulseMultiplier);
+
+				for(PxU32 i = 0; i < 4; i++)
+				{
+					if(a < constraintDescs[i].numRows)
+					{
+						const PxReal minRowResponseI = constraintDescs[i].minResponseThreshold;
+						const PxU16 constraintFlagsI = constraints[i]->flags;
+						const PxReal stiffnessI = constraints[i]->mods.spring.stiffness;
+						const PxReal dampingI = constraints[i]->mods.spring.damping;
+						const PxReal restitutionI = constraints[i]->mods.bounce.restitution;
+						const PxReal bounceVelocityThresholdI = constraints[i]->mods.bounce.velocityThreshold;
+						const PxReal geometricErrorI = constraints[i]->geometricError;
+						const PxReal velocityTargetI = constraints[i]->velocityTarget;
+						const PxReal jointSpeedForRestitutionBounceI = initJointSpeeds4[i];
+						const PxReal initJointSpeedI = initJointSpeeds4[i];
+						const PxReal unitResponseI = unitResponses4[i];
+						const PxReal erpI = erp[i];
+	
+						const PxReal recipResponseI = computeRecipUnitResponse(unitResponseI, minRowResponseI);
+
+						const Constraint1dSolverConstantsPGS solverConstants = 
+							 Dy::compute1dConstraintSolverConstantsPGS(
+								constraintFlagsI, 
+								stiffnessI, dampingI, 
+								restitutionI, bounceVelocityThresholdI,
+								geometricErrorI, velocityTargetI,
+								jointSpeedForRestitutionBounceI, initJointSpeedI,
+								unitResponseI, recipResponseI, 
+								erpI, 
+								simDt, recipSimDt);					
+
+						biasedConstants4[i] = solverConstants.constant;
+						unbiasedConstants4[i] = solverConstants.unbiasedConstant;
+						velMultipliers4[i] = solverConstants.velMultiplier;
+						impulseMultipliers4[i] = solverConstants.impulseMultiplier;
+					}
+					else
+					{
+						biasedConstants4[i] = 0;
+						unbiasedConstants4[i] = 0;
+						velMultipliers4[i] = 0;
+						impulseMultipliers4[i] = 0;
+					}
+				}
+			}
+
+			if(con0->flags & Px1DConstraintFlag::eOUTPUT_FORCE)
+				c->flags[0] |= DY_SC_FLAG_OUTPUT_FORCE;
+			if(con1->flags & Px1DConstraintFlag::eOUTPUT_FORCE)
+				c->flags[1] |= DY_SC_FLAG_OUTPUT_FORCE;
+			if(con2->flags & Px1DConstraintFlag::eOUTPUT_FORCE)
+				c->flags[2] |= DY_SC_FLAG_OUTPUT_FORCE;
+			if(con3->flags & Px1DConstraintFlag::eOUTPUT_FORCE)
+				c->flags[3] |= DY_SC_FLAG_OUTPUT_FORCE;
+		}
+		*(reinterpret_cast<PxU32*>(currPtr)) = 0;
+		*(reinterpret_cast<PxU32*>(currPtr + 4)) = 0;
+	}
+	
+	//OK, we're ready to allocate and solve prep these constraints now :-)
+	return SolverConstraintPrepState::eSUCCESS;
+}
+
+}
+
+}
--- a/engine/third_party/physx/source/lowleveldynamics/src/DyContactPrep.cpp
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DyContactPrep.cpp
@@ -0,0 +1,831 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+     
+#include "foundation/PxPreprocessor.h"
+#include "foundation/PxVecMath.h"
+#include "DyThreadContext.h"
+#include "PxcNpContactPrepShared.h"
+#include "DyConstraintPrep.h"
+#include "DyAllocator.h"
+
+using namespace physx;
+
+#include "DyContactPrepShared.h"
+
+#include "DySolverConstraint1DStep.h"
+
+using namespace aos;
+
+namespace physx
+{
+namespace Dy
+{
+
+static void setupFinalizeSolverConstraints(
+							const PxSolverContactDesc& contactDesc,
+							const CorrelationBuffer& c,
+							PxU8* workspace,
+							const PxSolverBodyData& data0,
+							const PxSolverBodyData& data1,
+							PxReal invDtF32, PxReal dtF32, PxReal bounceThresholdF32,
+							bool hasForceThreshold, bool staticOrKinematicBody,
+							PxU8* frictionDataPtr
+							)
+{
+	// NOTE II: the friction patches are sparse (some of them have no contact patches, and
+	// therefore did not get written back to the cache) but the patch addresses are dense,
+	// corresponding to valid patches
+
+	const Vec3V solverOffsetSlop = V3Load(contactDesc.offsetSlop);
+
+	const FloatV ccdMaxSeparation = FLoad(contactDesc.maxCCDSeparation);
+
+	const PxU8 flags = PxU8(hasForceThreshold ? SolverContactHeader::eHAS_FORCE_THRESHOLDS : 0);
+
+	const PxU8 type = PxTo8(staticOrKinematicBody ? DY_SC_TYPE_STATIC_CONTACT : DY_SC_TYPE_RB_CONTACT);
+
+	const FloatV zero = FZero();
+
+	const FloatV d0 = FLoad(contactDesc.invMassScales.linear0);
+	const FloatV d1 = FLoad(contactDesc.invMassScales.linear1);
+	const FloatV angD0 = FLoad(contactDesc.invMassScales.angular0);
+	const FloatV angD1 = FLoad(contactDesc.invMassScales.angular1);
+
+	const FloatV nDom1fV = FNeg(d1);
+
+	const FloatV invMass0 = FLoad(data0.invMass);
+	const FloatV invMass1 = FLoad(data1.invMass);
+
+	const FloatV invMass0_dom0fV = FMul(d0, invMass0);
+	const FloatV invMass1_dom1fV = FMul(nDom1fV, invMass1);
+
+	Vec4V staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W = V4Zero();
+	staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W = V4SetZ(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, invMass0_dom0fV);
+	staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W = V4SetW(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, invMass1_dom1fV);
+
+	const FloatV restDistance = FLoad(contactDesc.restDistance);
+
+	const FloatV maxPenBias = FMax(FLoad(data0.penBiasClamp), FLoad(data1.penBiasClamp));
+
+	const QuatV bodyFrame0q = QuatVLoadU(&contactDesc.bodyFrame0.q.x);
+	const Vec3V bodyFrame0p = V3LoadU_SafeReadW(contactDesc.bodyFrame0.p);	// PT: see compile-time-assert in PxSolverConstraintPrepDescBase
+	const QuatV bodyFrame1q = QuatVLoadU(&contactDesc.bodyFrame1.q.x);
+	const Vec3V bodyFrame1p = V3LoadU_SafeReadW(contactDesc.bodyFrame1.p);	// PT: see compile-time-assert in PxSolverConstraintPrepDescBase
+
+	PxU32 frictionPatchWritebackAddrIndex = 0;
+
+	PxPrefetchLine(c.contactID);
+	PxPrefetchLine(c.contactID, 128);
+
+	const Vec3V linVel0 = V3LoadU_SafeReadW(data0.linearVelocity);	// PT: safe because 'invMass' follows 'linearVelocity' in PxSolverBodyData
+	const Vec3V linVel1 = V3LoadU_SafeReadW(data1.linearVelocity);	// PT: safe because 'invMass' follows 'linearVelocity' in PxSolverBodyData
+	const Vec3V angVel0 = V3LoadU_SafeReadW(data0.angularVelocity);	// PT: safe because 'reportThreshold' follows 'angularVelocity' in PxSolverBodyData
+	const Vec3V angVel1 = V3LoadU_SafeReadW(data1.angularVelocity);	// PT: safe because 'reportThreshold' follows 'angularVelocity' in PxSolverBodyData
+
+	PX_ALIGN(16, const Mat33V invSqrtInertia0)
+	(
+		V3LoadU_SafeReadW(data0.sqrtInvInertia.column0),	// PT: safe because 'column1' follows 'column0' in PxMat33
+		V3LoadU_SafeReadW(data0.sqrtInvInertia.column1),	// PT: safe because 'column2' follows 'column1' in PxMat33
+		V3LoadU_SafeReadW(data0.sqrtInvInertia.column2)		// PT: safe because sqrtInvInertia is not the last member of PxSolverBodyData, see compile-time-assert in PxSolverBodyData
+	);
+	
+	PX_ALIGN(16, const Mat33V invSqrtInertia1)
+	(
+		V3LoadU_SafeReadW(data1.sqrtInvInertia.column0),	// PT: safe because 'column1' follows 'column0' in PxMat33
+		V3LoadU_SafeReadW(data1.sqrtInvInertia.column1),	// PT: safe because 'column2' follows 'column1' in PxMat33
+		V3LoadU_SafeReadW(data1.sqrtInvInertia.column2)		// PT: safe because sqrtInvInertia is not the last member of PxSolverBodyData, see compile-time-assert in PxSolverBodyData
+	);
+
+	const FloatV invDt = FLoad(invDtF32);
+	const FloatV p8 = FLoad(0.8f);
+	const FloatV bounceThreshold = FLoad(bounceThresholdF32);
+
+	const FloatV invDtp8 = FMul(invDt, p8);
+	const FloatV dt = FLoad(dtF32);
+
+	const PxContactPoint* PX_RESTRICT buffer = contactDesc.contacts;
+	PxU8* PX_RESTRICT ptr = workspace;
+	for(PxU32 i=0;i<c.frictionPatchCount;i++)
+	{
+		const PxU32 contactCount = c.frictionPatchContactCounts[i];
+		if(contactCount == 0)
+			continue;
+
+		const FrictionPatch& frictionPatch = c.frictionPatches[i];
+		PX_ASSERT(frictionPatch.anchorCount <= 2);
+
+		const PxU32 firstPatch = c.correlationListHeads[i];
+		const PxContactPoint* contactBase0 = buffer + c.contactPatches[firstPatch].start;
+
+		SolverContactHeader* PX_RESTRICT header = reinterpret_cast<SolverContactHeader*>(ptr);
+		ptr += sizeof(SolverContactHeader);		
+
+		PxPrefetchLine(ptr, 128);
+		PxPrefetchLine(ptr, 256);
+
+		header->shapeInteraction = getInteraction(contactDesc);
+		header->flags = flags;
+		FStore(invMass0_dom0fV, &header->invMass0);
+		FStore(FNeg(invMass1_dom1fV), &header->invMass1);
+		const FloatV restitution = FLoad(contactBase0->restitution);
+		const BoolV accelerationSpring = BLoad(!!(contactBase0->materialFlags & PxMaterialFlag::eCOMPLIANT_ACCELERATION_SPRING));
+
+		const FloatV damping = FLoad(contactBase0->damping);
+		const PxU32 pointStride = sizeof(SolverContactPoint);
+		const PxU32 frictionStride = sizeof(SolverContactFriction);
+
+		const Vec3V normal = V3LoadA(buffer[c.contactPatches[c.correlationListHeads[i]].start].normal);
+		const FloatV normalLenSq = V3LengthSq(normal);
+		const VecCrossV norCross = V3PrepareCross(normal);
+		const FloatV norVel = V3SumElems(V3NegMulSub(normal, linVel1, V3Mul(normal, linVel0)));
+
+		const FloatV invMassNorLenSq0 = FMul(invMass0_dom0fV, normalLenSq);
+		const FloatV invMassNorLenSq1 = FMul(invMass1_dom1fV, normalLenSq);
+
+		header->normal_minAppliedImpulseForFrictionW = Vec4V_From_Vec3V(normal);
+		
+		for(PxU32 patch=c.correlationListHeads[i]; 
+			patch!=CorrelationBuffer::LIST_END; 
+			patch = c.contactPatches[patch].next)
+		{
+			const PxU32 count = c.contactPatches[patch].count;
+			const PxContactPoint* contactBase = buffer + c.contactPatches[patch].start;
+				
+			PxU8* p = ptr;
+			
+			for(PxU32 j=0;j<count;j++)
+			{
+				PxPrefetchLine(p, 256);
+				const PxContactPoint& contact = contactBase[j];
+
+				SolverContactPoint* PX_RESTRICT solverContact = reinterpret_cast<SolverContactPoint*>(p);
+				p += pointStride;
+
+				constructContactConstraint(invSqrtInertia0, invSqrtInertia1, invMassNorLenSq0, 
+					invMassNorLenSq1, angD0, angD1, bodyFrame0p, bodyFrame1p,
+					normal, norVel, norCross, angVel0, angVel1,
+					invDt, invDtp8, dt, restDistance, maxPenBias, restitution,
+					bounceThreshold, contact, *solverContact,
+					ccdMaxSeparation, solverOffsetSlop, damping, accelerationSpring);
+			}
+
+			ptr = p;
+		}
+
+		PxF32* forceBuffers = reinterpret_cast<PxF32*>(ptr);
+		PxMemZero(forceBuffers, sizeof(PxF32) * contactCount);
+		ptr += ((contactCount + 3) & (~3)) * sizeof(PxF32); // jump to next 16-byte boundary
+		
+		const PxReal frictionCoefficient = (frictionPatch.anchorCount == 2) ? 0.5f : 1.f;
+
+		const PxReal staticFriction = contactBase0->staticFriction * frictionCoefficient;
+		const PxReal dynamicFriction = contactBase0->dynamicFriction* frictionCoefficient;
+
+		const bool disableStrongFriction = !!(contactBase0->materialFlags & PxMaterialFlag::eDISABLE_FRICTION);
+		staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W=V4SetX(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, FLoad(staticFriction));
+		staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W=V4SetY(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, FLoad(dynamicFriction));
+
+		const bool haveFriction = (disableStrongFriction == 0 && frictionPatch.anchorCount != 0);//PX_IR(n.staticFriction) > 0 || PX_IR(n.dynamicFriction) > 0;
+		header->numNormalConstr		= PxTo8(contactCount);
+		header->numFrictionConstr	= PxTo8(haveFriction ? frictionPatch.anchorCount*2 : 0);
+	
+		header->type				= type;
+
+		header->staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W = staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W;
+		FStore(angD0, &header->angDom0);
+		FStore(angD1, &header->angDom1);
+
+		header->broken = 0;
+
+		if(haveFriction)
+		{
+			const Vec3V linVrel = V3Sub(linVel0, linVel1);
+			//const Vec3V normal = Vec3V_From_PxVec3_Aligned(buffer.contacts[c.contactPatches[c.correlationListHeads[i]].start].normal);
+
+			const FloatV orthoThreshold = FLoad(0.70710678f);
+			const FloatV p1 = FLoad(0.0001f);
+			// fallback: normal.cross((1,0,0)) or normal.cross((0,0,1))
+			const FloatV normalX = V3GetX(normal);
+			const FloatV normalY = V3GetY(normal);
+			const FloatV normalZ = V3GetZ(normal);
+			
+			const Vec3V t0Fallback1 = V3Merge(zero, FNeg(normalZ), normalY);
+			const Vec3V t0Fallback2 = V3Merge(FNeg(normalY), normalX, zero);
+			const Vec3V t0Fallback = V3Sel(FIsGrtr(orthoThreshold, FAbs(normalX)), t0Fallback1, t0Fallback2);
+
+			Vec3V t0 = V3Sub(linVrel, V3Scale(normal, V3Dot(normal, linVrel)));
+			t0 = V3Sel(FIsGrtr(V3LengthSq(t0), p1), t0, t0Fallback);
+			t0 = V3Normalize(t0);
+
+			const VecCrossV t0Cross = V3PrepareCross(t0);
+
+			const Vec3V t1 = V3Cross(norCross, t0Cross);
+			const VecCrossV t1Cross = V3PrepareCross(t1);
+		
+			// since we don't even have the body velocities we can't compute the tangent dirs, so 
+			// the only thing we can do right now is to write the geometric information (which is the
+			// same for both axis constraints of an anchor) We put ra in the raXn field, rb in the rbXn
+			// field, and the error in the normal field. See corresponding comments in
+			// completeContactFriction()
+
+			//We want to set the writeBack ptr to point to the broken flag of the friction patch.
+			//On spu we have a slight problem here because the friction patch array is 
+			//in local store rather than in main memory. The good news is that the address of the friction 
+			//patch array in main memory is stored in the work unit. These two addresses will be equal 
+			//except on spu where one is local store memory and the other is the effective address in main memory.
+			//Using the value stored in the work unit guarantees that the main memory address is used on all platforms.
+			PxU8* PX_RESTRICT writeback = frictionDataPtr + frictionPatchWritebackAddrIndex*sizeof(FrictionPatch);
+
+			header->frictionBrokenWritebackByte = writeback;
+
+			const Vec3V v3Zero = V3Zero();
+			for(PxU32 j = 0; j < frictionPatch.anchorCount; j++)
+			{
+				PxPrefetchLine(ptr, 256);
+				PxPrefetchLine(ptr, 384);
+				SolverContactFriction* PX_RESTRICT f0 = reinterpret_cast<SolverContactFriction*>(ptr);
+				ptr += frictionStride;
+				SolverContactFriction* PX_RESTRICT f1 = reinterpret_cast<SolverContactFriction*>(ptr);
+				ptr += frictionStride;
+
+				const Vec3V body0Anchor = V3LoadU_SafeReadW(frictionPatch.body0Anchors[j]);	// PT: see compile-time-assert in FrictionPatch
+				const Vec3V body1Anchor = V3LoadU_SafeReadW(frictionPatch.body1Anchors[j]);	// PT: see compile-time-assert in FrictionPatch
+
+				const Vec3V ra = QuatRotate(bodyFrame0q, body0Anchor);
+				const Vec3V rb = QuatRotate(bodyFrame1q, body1Anchor);
+
+				/*ra = V3Sel(V3IsGrtr(solverOffsetSlop, V3Abs(ra)), v3Zero, ra);
+				rb = V3Sel(V3IsGrtr(solverOffsetSlop, V3Abs(rb)), v3Zero, rb);*/
+
+				PxU32 index = c.contactID[i][j];
+				Vec3V error = V3Sub(V3Add(ra, bodyFrame0p), V3Add(rb, bodyFrame1p));
+				error = V3Sel(V3IsGrtr(solverOffsetSlop, V3Abs(error)), v3Zero, error);
+
+				index = index == 0xFFFF ? c.contactPatches[c.correlationListHeads[i]].start : index;
+
+				const Vec3V tvel = V3LoadA(buffer[index].targetVel);
+				
+				{
+					Vec3V raXn = V3Cross(ra, t0Cross);
+					Vec3V rbXn = V3Cross(rb, t0Cross);
+
+					raXn = V3Sel(V3IsGrtr(solverOffsetSlop, V3Abs(raXn)), V3Zero(), raXn);
+					rbXn = V3Sel(V3IsGrtr(solverOffsetSlop, V3Abs(rbXn)), V3Zero(), rbXn);
+
+					const Vec3V raXnSqrtInertia = M33MulV3(invSqrtInertia0, raXn);
+					const Vec3V rbXnSqrtInertia = M33MulV3(invSqrtInertia1, rbXn);	
+
+					const FloatV resp0 = FAdd(invMass0_dom0fV, FMul(angD0, V3Dot(raXnSqrtInertia, raXnSqrtInertia)));
+					const FloatV resp1 = FSub(FMul(angD1, V3Dot(rbXnSqrtInertia, rbXnSqrtInertia)), invMass1_dom1fV);
+					const FloatV resp = FAdd(resp0, resp1);
+
+					const FloatV velMultiplier = FSel(FIsGrtr(resp, zero), FDiv(p8, resp), zero);
+
+					FloatV targetVel = V3Dot(tvel, t0);
+
+					const FloatV vrel1 = FAdd(V3Dot(t0, linVel0), V3Dot(raXn, angVel0));
+					const FloatV vrel2 = FAdd(V3Dot(t0, linVel1), V3Dot(rbXn, angVel1));
+					const FloatV vrel = FSub(vrel1, vrel2);
+
+					targetVel = FSub(targetVel, vrel);
+
+					f0->normalXYZ_appliedForceW = V4ClearW(Vec4V_From_Vec3V(t0));
+					f0->raXnXYZ_velMultiplierW = V4SetW(raXnSqrtInertia, velMultiplier);
+					f0->rbXnXYZ_biasW = V4SetW(rbXnSqrtInertia, FMul(V3Dot(t0, error), invDt));
+					FStore(targetVel, &f0->targetVel);
+				}
+
+				{
+					Vec3V raXn = V3Cross(ra, t1Cross);
+					Vec3V rbXn = V3Cross(rb, t1Cross);
+
+					raXn = V3Sel(V3IsGrtr(solverOffsetSlop, V3Abs(raXn)), V3Zero(), raXn);
+					rbXn = V3Sel(V3IsGrtr(solverOffsetSlop, V3Abs(rbXn)), V3Zero(), rbXn);
+
+					const Vec3V raXnSqrtInertia = M33MulV3(invSqrtInertia0, raXn);
+					const Vec3V rbXnSqrtInertia = M33MulV3(invSqrtInertia1, rbXn);	
+
+					const FloatV resp0 = FAdd(invMass0_dom0fV, FMul(angD0, V3Dot(raXnSqrtInertia, raXnSqrtInertia)));
+					const FloatV resp1 = FSub(FMul(angD1, V3Dot(rbXnSqrtInertia, rbXnSqrtInertia)), invMass1_dom1fV);
+					const FloatV resp = FAdd(resp0, resp1);
+
+					const FloatV velMultiplier = FSel(FIsGrtr(resp, zero), FDiv(p8, resp), zero);
+
+					FloatV targetVel = V3Dot(tvel, t1);
+
+					const FloatV vrel1 = FAdd(V3Dot(t1, linVel0), V3Dot(raXn, angVel0));
+					const FloatV vrel2 = FAdd(V3Dot(t1, linVel1), V3Dot(rbXn, angVel1));
+					const FloatV vrel = FSub(vrel1, vrel2);
+
+					targetVel = FSub(targetVel, vrel);
+
+					f1->normalXYZ_appliedForceW = V4ClearW(Vec4V_From_Vec3V(t1));
+					f1->raXnXYZ_velMultiplierW = V4SetW(raXnSqrtInertia, velMultiplier);
+					f1->rbXnXYZ_biasW = V4SetW(rbXnSqrtInertia, FMul(V3Dot(t1, error), invDt));
+					FStore(targetVel, &f1->targetVel);
+				}
+			}
+		}
+
+		frictionPatchWritebackAddrIndex++;
+	}
+}
+
+PX_FORCE_INLINE void computeBlockStreamByteSizes(const bool useExtContacts, const CorrelationBuffer& c,
+								PxU32& _solverConstraintByteSize, PxU32& _frictionPatchByteSize, PxU32& _numFrictionPatches,
+								PxU32& _axisConstraintCount)
+{
+	PX_ASSERT(0 == _solverConstraintByteSize);
+	PX_ASSERT(0 == _frictionPatchByteSize);
+	PX_ASSERT(0 == _numFrictionPatches);
+	PX_ASSERT(0 == _axisConstraintCount);
+
+	// PT: use local vars to remove LHS
+	PxU32 solverConstraintByteSize = 0;
+	PxU32 numFrictionPatches = 0;
+	PxU32 axisConstraintCount = 0;
+
+	for(PxU32 i = 0; i < c.frictionPatchCount; i++)
+	{
+		//Friction patches.
+		if(c.correlationListHeads[i] != CorrelationBuffer::LIST_END)
+			numFrictionPatches++;
+
+		const FrictionPatch& frictionPatch = c.frictionPatches[i];
+
+		const bool haveFriction = (frictionPatch.materialFlags & PxMaterialFlag::eDISABLE_FRICTION) == 0;
+
+		//Solver constraint data.
+		if(c.frictionPatchContactCounts[i]!=0)
+		{
+			solverConstraintByteSize += sizeof(SolverContactHeader);
+			solverConstraintByteSize += useExtContacts ? c.frictionPatchContactCounts[i] * sizeof(SolverContactPointExt) 
+				: c.frictionPatchContactCounts[i] * sizeof(SolverContactPoint);
+			solverConstraintByteSize += sizeof(PxF32) * ((c.frictionPatchContactCounts[i] + 3)&(~3)); //Add on space for applied impulses
+
+			axisConstraintCount += c.frictionPatchContactCounts[i];
+
+			if(haveFriction)
+			{
+				solverConstraintByteSize += useExtContacts ? c.frictionPatches[i].anchorCount * 2 * sizeof(SolverContactFrictionExt)
+					: c.frictionPatches[i].anchorCount * 2 * sizeof(SolverContactFriction);
+				axisConstraintCount += c.frictionPatches[i].anchorCount * 2;
+			}
+		}
+	}
+	PxU32 frictionPatchByteSize = numFrictionPatches*sizeof(FrictionPatch);
+
+	_numFrictionPatches = numFrictionPatches;
+	_axisConstraintCount = axisConstraintCount;
+
+	//16-byte alignment.
+	_frictionPatchByteSize = ((frictionPatchByteSize + 0x0f) & ~0x0f);
+	_solverConstraintByteSize = ((solverConstraintByteSize + 0x0f) & ~0x0f);
+	PX_ASSERT(0 == (_solverConstraintByteSize & 0x0f));
+	PX_ASSERT(0 == (_frictionPatchByteSize & 0x0f));
+}
+
+static bool reserveBlockStreams(const bool useExtContacts, Dy::CorrelationBuffer& cBuffer,
+						PxU8*& solverConstraint,
+						FrictionPatch*& _frictionPatches,
+						PxU32& numFrictionPatches, PxU32& solverConstraintByteSize,
+						PxU32& axisConstraintCount, PxConstraintAllocator& constraintAllocator)
+{
+	PX_ASSERT(NULL == solverConstraint);
+	PX_ASSERT(NULL == _frictionPatches);
+	PX_ASSERT(0 == numFrictionPatches);
+	PX_ASSERT(0 == solverConstraintByteSize);
+	PX_ASSERT(0 == axisConstraintCount);
+
+	//From frictionPatchStream we just need to reserve a single buffer.
+	PxU32 frictionPatchByteSize = 0;
+	//Compute the sizes of all the buffers.
+	computeBlockStreamByteSizes(
+		useExtContacts, cBuffer,
+		solverConstraintByteSize, frictionPatchByteSize, numFrictionPatches,
+		axisConstraintCount);
+
+	//Reserve the buffers.
+
+	//First reserve the accumulated buffer size for the constraint block.
+	PxU8* constraintBlock = NULL;
+	const PxU32 constraintBlockByteSize = solverConstraintByteSize;
+	if(constraintBlockByteSize > 0)
+	{
+		constraintBlock = constraintAllocator.reserveConstraintData(constraintBlockByteSize + 16u);
+		if(!checkConstraintDataPtr<false>(constraintBlock))
+			constraintBlock = NULL;
+
+		PX_ASSERT((size_t(constraintBlock) & 0xF) == 0);
+	}
+
+	FrictionPatch* frictionPatches = NULL;
+	//If the constraint block reservation didn't fail then reserve the friction buffer too.
+	if(frictionPatchByteSize >0 && (0==constraintBlockByteSize || constraintBlock))
+	{
+		frictionPatches = reinterpret_cast<FrictionPatch*>(constraintAllocator.reserveFrictionData(frictionPatchByteSize));
+		if(!checkFrictionDataPtr(frictionPatches))
+			frictionPatches = NULL;
+	}
+
+	_frictionPatches = frictionPatches;
+
+	//Patch up the individual ptrs to the buffer returned by the constraint block reservation (assuming the reservation didn't fail).
+	if(0==constraintBlockByteSize || constraintBlock)
+	{
+		if(solverConstraintByteSize)
+		{
+			solverConstraint = constraintBlock;
+			PX_ASSERT(0==(uintptr_t(solverConstraint) & 0x0f));
+		}
+	}
+
+	//Return true if neither of the two block reservations failed.
+	return ((0==constraintBlockByteSize || constraintBlock) && (0==frictionPatchByteSize || frictionPatches));
+}
+
+bool createFinalizeSolverContacts(
+	PxSolverContactDesc& contactDesc,
+	CorrelationBuffer& c,
+	const PxReal invDtF32,
+	const PxReal dtF32,
+	PxReal bounceThresholdF32,
+	PxReal frictionOffsetThreshold,
+	PxReal correlationDistance,
+	PxConstraintAllocator& constraintAllocator,
+	Cm::SpatialVectorF* Z)
+{
+	PxPrefetchLine(contactDesc.body0);
+	PxPrefetchLine(contactDesc.body1);
+	PxPrefetchLine(contactDesc.data0);
+	PxPrefetchLine(contactDesc.data1);
+
+	c.frictionPatchCount = 0;
+	c.contactPatchCount = 0;
+
+	const bool hasForceThreshold = contactDesc.hasForceThresholds;
+	const bool staticOrKinematicBody = contactDesc.bodyState1 == PxSolverContactDesc::eKINEMATIC_BODY || contactDesc.bodyState1 == PxSolverContactDesc::eSTATIC_BODY;
+
+	const bool disableStrongFriction = contactDesc.disableStrongFriction;
+
+	PxSolverConstraintDesc& desc = *contactDesc.desc;
+
+	const bool useExtContacts = (desc.linkIndexA != PxSolverConstraintDesc::RIGID_BODY || desc.linkIndexB != PxSolverConstraintDesc::RIGID_BODY);
+
+	desc.constraint = NULL;
+	desc.constraintLengthOver16 = 0; // from here onwards we use this field for the constraint size, not the constraint type anymore
+
+	if (contactDesc.numContacts == 0)
+	{
+		contactDesc.frictionPtr = NULL;
+		contactDesc.frictionCount = 0;
+		return true;
+	}
+
+	if (!disableStrongFriction)
+	{
+		getFrictionPatches(c, contactDesc.frictionPtr, contactDesc.frictionCount, contactDesc.bodyFrame0, contactDesc.bodyFrame1, correlationDistance);
+	}
+
+	bool overflow = !createContactPatches(c, contactDesc.contacts, contactDesc.numContacts, PXC_SAME_NORMAL);
+	overflow = correlatePatches(c, contactDesc.contacts, contactDesc.bodyFrame0, contactDesc.bodyFrame1, PXC_SAME_NORMAL, 0, 0) || overflow;
+	PX_UNUSED(overflow);
+
+#if PX_CHECKED
+	if (overflow)
+		PxGetFoundation().error(physx::PxErrorCode::eDEBUG_WARNING, PX_FL, "Dropping contacts in solver because we exceeded limit of 32 friction patches.");
+#endif
+
+	growPatches(c, contactDesc.contacts, contactDesc.bodyFrame0, contactDesc.bodyFrame1, 0, frictionOffsetThreshold + contactDesc.restDistance);
+
+	//PX_ASSERT(patchCount == c.frictionPatchCount);
+
+	FrictionPatch* frictionPatches = NULL;
+	PxU8* solverConstraint = NULL;
+	PxU32 numFrictionPatches = 0;
+	PxU32 solverConstraintByteSize = 0;
+	PxU32 axisConstraintCount = 0;
+
+	const bool successfulReserve = reserveBlockStreams(
+		useExtContacts, c,
+		solverConstraint, frictionPatches,
+		numFrictionPatches,
+		solverConstraintByteSize,
+		axisConstraintCount,
+		constraintAllocator);
+	// initialise the work unit's ptrs to the various buffers.
+
+	// patch up the work unit with the reserved buffers and set the reserved buffer data as appropriate.
+	contactDesc.frictionPtr = NULL;
+	contactDesc.frictionCount = 0;
+
+	if (successfulReserve)
+	{
+		PxU8* frictionDataPtr = reinterpret_cast<PxU8*>(frictionPatches);
+		contactDesc.frictionPtr = frictionDataPtr;
+		desc.constraint = solverConstraint;
+		//output.nbContacts = PxTo8(numContacts);
+		contactDesc.frictionCount = PxTo8(numFrictionPatches);
+		PX_ASSERT(solverConstraintByteSize % 16 == 0);
+		desc.constraintLengthOver16 = PxTo16(solverConstraintByteSize / 16);
+		desc.writeBack = contactDesc.contactForces;
+
+		//Initialise friction buffer.
+		if (frictionPatches)
+		{
+			frictionPatches->prefetch();
+
+			for (PxU32 i = 0; i<c.frictionPatchCount; i++)
+			{
+				//if(c.correlationListHeads[i]!=CorrelationBuffer::LIST_END)
+				if (c.frictionPatchContactCounts[i])
+				{
+					*frictionPatches++ = c.frictionPatches[i];
+					PxPrefetchLine(frictionPatches, 256);
+				}
+			}
+		}
+
+		//Initialise solverConstraint buffer.
+		if (solverConstraint)
+		{
+			const PxSolverBodyData& data0 = *contactDesc.data0;
+			const PxSolverBodyData& data1 = *contactDesc.data1;
+			if (useExtContacts)
+			{
+				const SolverExtBody b0(reinterpret_cast<const void*>(contactDesc.body0), reinterpret_cast<const void*>(&data0), desc.linkIndexA);
+				const SolverExtBody b1(reinterpret_cast<const void*>(contactDesc.body1), reinterpret_cast<const void*>(&data1), desc.linkIndexB);
+
+				setupFinalizeExtSolverContacts(contactDesc.contacts, c, contactDesc.bodyFrame0, contactDesc.bodyFrame1, solverConstraint,
+					b0, b1, invDtF32, dtF32, bounceThresholdF32,
+					contactDesc.invMassScales.linear0, contactDesc.invMassScales.angular0, contactDesc.invMassScales.linear1, contactDesc.invMassScales.angular1, 
+					contactDesc.restDistance, frictionDataPtr, contactDesc.maxCCDSeparation, Z, contactDesc.offsetSlop);
+			}
+			else
+			{
+				setupFinalizeSolverConstraints(
+					contactDesc,
+					c,
+					solverConstraint,
+					data0, data1, invDtF32, dtF32, bounceThresholdF32,
+					hasForceThreshold, staticOrKinematicBody,
+					frictionDataPtr
+					);
+			}
+			//KS - set to 0 so we have a counter for the number of times we solved the constraint
+			//only going to be used on SPU but might as well set on all platforms because this code is shared
+			*(reinterpret_cast<PxU32*>(solverConstraint + solverConstraintByteSize)) = 0;
+		}
+	}
+
+	return successfulReserve;
+}
+
+FloatV setupExtSolverContact(const SolverExtBody& b0, const SolverExtBody& b1,
+	const FloatV& d0, const FloatV& d1, const FloatV& angD0, const FloatV& angD1, const Vec3V& bodyFrame0p, const Vec3V& bodyFrame1p,
+	const Vec3VArg normal, const FloatVArg invDt, const FloatVArg invDtp8, const FloatVArg dt, const FloatVArg restDistance, 
+	const FloatVArg maxPenBias, const FloatVArg restitution,const FloatVArg bounceThreshold, const PxContactPoint& contact, SolverContactPointExt& solverContact, const FloatVArg ccdMaxSeparation, Cm::SpatialVectorF* zVector,
+	const Cm::SpatialVectorV& v0, const Cm::SpatialVectorV& v1, const FloatV& cfm, const Vec3VArg solverOffsetSlop,
+	const FloatVArg norVel0, const FloatVArg norVel1, const FloatVArg damping, const BoolVArg accelerationSpring)
+{
+	const FloatV zero = FZero();
+	const FloatV separation = FLoad(contact.separation);
+
+	const FloatV penetration = FSub(separation, restDistance);
+
+	const Vec3V point = V3LoadA(contact.point);
+
+	const Vec3V ra = V3Sub(point, bodyFrame0p);
+	const Vec3V rb = V3Sub(point, bodyFrame1p);
+
+	Vec3V raXn = V3Cross(ra, normal);
+	Vec3V rbXn = V3Cross(rb, normal);
+
+	FloatV aVel0 = V3Dot(v0.angular, raXn);
+	FloatV aVel1 = V3Dot(v1.angular, raXn);
+
+	FloatV relLinVel = FSub(norVel0, norVel1);
+	FloatV relAngVel = FSub(aVel0, aVel1);
+	
+	const Vec3V slop = V3Scale(solverOffsetSlop, FMax(FSel(FIsEq(relLinVel, zero), FMax(), FDiv(relAngVel, relLinVel)), FOne()));
+
+	raXn = V3Sel(V3IsGrtr(slop, V3Abs(raXn)), V3Zero(), raXn);
+	rbXn = V3Sel(V3IsGrtr(slop, V3Abs(rbXn)), V3Zero(), rbXn);
+
+	aVel0 = V3Dot(raXn, v0.angular);
+	aVel1 = V3Dot(rbXn, v1.angular);
+
+	relAngVel = FSub(aVel0, aVel1);
+
+	Cm::SpatialVectorV deltaV0, deltaV1;
+
+	const Cm::SpatialVectorV resp0 = createImpulseResponseVector(normal, raXn, b0);
+	const Cm::SpatialVectorV resp1 = createImpulseResponseVector(V3Neg(normal), V3Neg(rbXn), b1);
+
+	const FloatV unitResponse = getImpulseResponse(b0, resp0, deltaV0, d0, angD0,
+		b1, resp1, deltaV1, d1, angD1, reinterpret_cast<Cm::SpatialVectorV*>(zVector));
+
+	const FloatV vrel = FAdd(relLinVel, relAngVel);
+
+	const FloatV penetrationInvDt = FMul(penetration, invDt);
+	const BoolV isSeparated = FIsGrtrOrEq(penetration, zero);
+
+	const BoolV collidingWithVrel = FIsGrtr(FNeg(vrel), penetrationInvDt); // true if (pen + dt*vrel) < 0
+	const BoolV isGreater2 = BAnd(BAnd(FIsGrtr(restitution, zero), FIsGrtr(bounceThreshold, vrel)), collidingWithVrel);
+
+	FloatV velMultiplier, impulseMultiplier;
+	FloatV biasedErr, unbiasedErr;
+
+	const FloatV tVel = FSel(isGreater2, FMul(FNeg(vrel), restitution), zero);
+	FloatV targetVelocity = tVel;
+	//Get the rigid body's current velocity and embed into the constraint target velocities
+	if (b0.mLinkIndex == PxSolverConstraintDesc::RIGID_BODY)
+		targetVelocity = FSub(targetVelocity, FAdd(norVel0, aVel0));
+	else if (b1.mLinkIndex == PxSolverConstraintDesc::RIGID_BODY)
+		targetVelocity = FAdd(targetVelocity, FAdd(norVel1, aVel1));
+
+	targetVelocity = FAdd(targetVelocity, V3Dot(V3LoadA(contact.targetVel), normal));
+
+	// jcarius: the addition of the cfm term is not present in equivalent code for rigid bodies
+	const FloatV recipResponse = FSel(FIsGrtr(unitResponse, zero), FRecip(FAdd(unitResponse, cfm)), zero);
+
+	if (FAllGrtr(zero, restitution))
+	{
+		computeCompliantContactCoefficients(dt, restitution, damping, recipResponse, unitResponse, penetration,
+		                                    targetVelocity, accelerationSpring, isSeparated, collidingWithVrel,
+		                                    velMultiplier, impulseMultiplier, unbiasedErr, biasedErr);
+	}
+	else
+	{
+		const BoolV ccdSeparationCondition = FIsGrtrOrEq(ccdMaxSeparation, penetration);
+		velMultiplier = recipResponse;
+		const FloatV penetrationInvDtScaled = FSel(isSeparated, penetrationInvDt, FMul(penetration, invDtp8));
+		FloatV scaledBias = FMul(velMultiplier, FMax(maxPenBias, penetrationInvDtScaled));
+		scaledBias = FSel(BAnd(ccdSeparationCondition, isGreater2), zero, scaledBias);
+
+		biasedErr = FScaleAdd(targetVelocity, velMultiplier, FNeg(scaledBias));
+		unbiasedErr = FScaleAdd(targetVelocity, velMultiplier, FSel(isGreater2, zero, FNeg(FMax(scaledBias, zero))));
+		impulseMultiplier = FOne();
+	}
+
+	const FloatV deltaF = FMax(FMul(FSub(tVel, FAdd(vrel, FMax(penetrationInvDt, zero))), velMultiplier), zero);
+
+	FStore(biasedErr, &solverContact.biasedErr);
+	FStore(unbiasedErr, &solverContact.unbiasedErr);
+
+	solverContact.raXn_velMultiplierW = V4SetW(Vec4V_From_Vec3V(resp0.angular), velMultiplier);
+	solverContact.rbXn_maxImpulseW = V4SetW(Vec4V_From_Vec3V(V3Neg(resp1.angular)), FLoad(contact.maxImpulse));
+	solverContact.linDeltaVA = deltaV0.linear;
+	solverContact.angDeltaVA = deltaV0.angular;
+	solverContact.linDeltaVB = deltaV1.linear;
+	solverContact.angDeltaVB = deltaV1.angular;
+	FStore(impulseMultiplier, &solverContact.impulseMultiplier);
+
+	return deltaF;
+}
+
+bool createFinalizeSolverContacts(PxSolverContactDesc& contactDesc,
+								  PxsContactManagerOutput& output,
+								 ThreadContext& threadContext,
+								 const PxReal invDtF32,
+								 const PxReal dtF32,
+								 PxReal bounceThresholdF32,
+								 PxReal frictionOffsetThreshold,
+								 PxReal correlationDistance,
+								 PxConstraintAllocator& constraintAllocator,
+								 Cm::SpatialVectorF* Z)
+{
+	PxContactBuffer& buffer = threadContext.mContactBuffer;
+
+	buffer.count = 0;
+
+	// We pull the friction patches out of the cache to remove the dependency on how
+	// the cache is organized. Remember original addrs so we can write them back 
+	// efficiently.
+
+	PxU32 numContacts = 0;
+	{
+		PxReal invMassScale0 = 1.f;
+		PxReal invMassScale1 = 1.f;
+		PxReal invInertiaScale0 = 1.f;
+		PxReal invInertiaScale1 = 1.f;
+
+		bool hasMaxImpulse = false, hasTargetVelocity = false;
+
+		numContacts = extractContacts(buffer, output, hasMaxImpulse, hasTargetVelocity, invMassScale0, invMassScale1,
+			invInertiaScale0, invInertiaScale1, PxMin(contactDesc.data0->maxContactImpulse, contactDesc.data1->maxContactImpulse));
+
+		contactDesc.contacts = buffer.contacts;
+		contactDesc.numContacts = numContacts;
+		contactDesc.disableStrongFriction = contactDesc.disableStrongFriction || hasTargetVelocity;
+		contactDesc.hasMaxImpulse = hasMaxImpulse;
+		contactDesc.invMassScales.linear0 *= invMassScale0;
+		contactDesc.invMassScales.linear1 *= invMassScale1;
+		contactDesc.invMassScales.angular0 *= invInertiaScale0;
+		contactDesc.invMassScales.angular1 *= invInertiaScale1;
+	}
+	
+	CorrelationBuffer& c = threadContext.mCorrelationBuffer;
+
+	return createFinalizeSolverContacts(contactDesc, c, invDtF32, dtF32, bounceThresholdF32, frictionOffsetThreshold,
+		correlationDistance, constraintAllocator, Z);
+}
+  
+PxU32 getContactManagerConstraintDesc(const PxsContactManagerOutput& cmOutput, const PxsContactManager& /*cm*/, PxSolverConstraintDesc& desc)
+{
+	desc.writeBack = cmOutput.contactForces;
+	desc.writeBackFriction = NULL;
+	return cmOutput.nbContacts;// cm.getWorkUnit().axisConstraintCount;
+}
+
+template
+void updateFrictionAnchorCountAndPosition<PxSolverContactDesc>(PxSolverConstraintDesc& desc, PxsContactManagerOutput& output, PxSolverContactDesc& blockDesc);
+template
+void updateFrictionAnchorCountAndPosition<PxTGSSolverContactDesc>(PxSolverConstraintDesc& desc, PxsContactManagerOutput& output, PxTGSSolverContactDesc& blockDesc);
+template <typename SolverContactDesc>
+void updateFrictionAnchorCountAndPosition(PxSolverConstraintDesc& desc, PxsContactManagerOutput& output, SolverContactDesc& blockDesc)
+{
+	desc.writeBackFriction = NULL;
+	if (output.frictionPatches == NULL) return;
+	const PxReal NORMAL_THRESHOLD = 0.999f;
+	PxTransform& bodyFrame0 = blockDesc.bodyFrame0;
+	for (PxU32 frictionIndex = 0; frictionIndex < blockDesc.frictionCount; ++frictionIndex)
+	{
+		FrictionPatch& frictionPatch = reinterpret_cast<FrictionPatch*>(blockDesc.frictionPtr)[frictionIndex];
+		PxVec3 frictionNormal = bodyFrame0.rotate(frictionPatch.body0Normal);
+		for (PxU32 patchIndex = 0; patchIndex < output.nbPatches; ++patchIndex)
+		{
+			PxContactPatch& patch = reinterpret_cast<PxContactPatch*>(output.contactPatches)[patchIndex];
+			if (patch.normal.dot(frictionNormal) > NORMAL_THRESHOLD &&
+				patch.staticFriction == frictionPatch.staticFriction &&
+				patch.dynamicFriction == frictionPatch.dynamicFriction &&
+				patch.restitution == frictionPatch.restitution)
+			{
+				PxFrictionPatch& outPatch = reinterpret_cast<PxFrictionPatch*>(output.frictionPatches)[patchIndex];
+				outPatch.anchorCount = frictionPatch.anchorCount;
+				outPatch.anchorPositions[0] = bodyFrame0.transform(frictionPatch.body0Anchors[0]);
+				outPatch.anchorPositions[1] = bodyFrame0.transform(frictionPatch.body0Anchors[1]);
+				desc.writeBackFriction = outPatch.anchorImpulses;
+				break;
+			}
+		}
+	}
+}
+
+template
+void writeBackContactFriction<SolverContactFrictionStep>(const SolverContactFrictionStep* PX_RESTRICT frictions, PxU32 numFrictionConstr, PxU32 frictionStride, PxVec3* PX_RESTRICT vFrictionWriteback);
+template
+void writeBackContactFriction<SolverContactFriction>(const SolverContactFriction* PX_RESTRICT frictions, PxU32 numFrictionConstr, PxU32 frictionStride, PxVec3* PX_RESTRICT vFrictionWriteback);
+template <typename SolverFriction>
+void writeBackContactFriction(const SolverFriction* PX_RESTRICT frictions, PxU32 numFrictionConstr, PxU32 frictionStride, PxVec3* PX_RESTRICT vFrictionWriteback)
+{
+	if (numFrictionConstr && vFrictionWriteback)
+	{
+		//We will have either 4 or 2 frictions (with friction pairs).
+		//With torsional friction, we may have 3 (a single friction anchor + twist).
+		const PxU32 numFrictionPairs = (numFrictionConstr & 6);
+		const PxU8* ptr = reinterpret_cast<const PxU8*>(frictions);
+
+		for (PxU32 i = 0; i < numFrictionPairs; i += 2)
+		{
+			const SolverFriction& f0 = *reinterpret_cast<const SolverFriction*>(ptr + (i + 0) * frictionStride);
+			const SolverFriction& f1 = *reinterpret_cast<const SolverFriction*>(ptr + (i + 1) * frictionStride);
+			const Vec3V normal0 = f0.getNormal();
+			const Vec3V normal1 = f1.getNormal();
+			const FloatV appliedForce0 = f0.getAppliedForce();
+			const FloatV appliedForce1 = f1.getAppliedForce();
+			Vec3V impulse = V3Add(V3Scale(normal0, appliedForce0), V3Scale(normal1, appliedForce1));
+			PxVec3& frictionImpulse = vFrictionWriteback[i / 2];
+			V3StoreU(impulse, frictionImpulse);
+		}
+	}
+}
+
+}
+
+}
--- a/engine/third_party/physx/source/lowleveldynamics/src/DyContactPrep.h
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DyContactPrep.h
@@ -0,0 +1,204 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef DY_CONTACT_PREP_H
+#define DY_CONTACT_PREP_H
+
+#include "CmSpatialVector.h"
+#include "DySolverConstraintDesc.h"
+
+namespace physx
+{
+struct PxcNpWorkUnit;
+class PxsConstraintBlockManager;
+class PxcConstraintBlockStream;
+struct PxsContactManagerOutput;
+class FrictionPatchStreamPair;
+struct PxSolverBody;
+struct PxSolverBodyData;
+struct PxSolverConstraintDesc;
+class PxsContactManager;
+
+namespace Dy
+{
+	class ThreadContext;
+	struct CorrelationBuffer;
+
+#define CREATE_FINALIZE_SOLVER_CONTACT_METHOD_ARGS	\
+	PxSolverContactDesc& contactDesc,				\
+	PxsContactManagerOutput& output,				\
+	ThreadContext& threadContext,					\
+	const PxReal invDtF32,							\
+	const PxReal dtF32,								\
+	PxReal bounceThresholdF32,						\
+	PxReal frictionOffsetThreshold,					\
+	PxReal correlationDistance,						\
+	PxConstraintAllocator& constraintAllocator,		\
+	Cm::SpatialVectorF* Z
+
+#define CREATE_FINALIZE_SOLVER_CONTACT_METHOD_ARGS_4	\
+	PxsContactManagerOutput** outputs,					\
+	ThreadContext& threadContext,						\
+	PxSolverContactDesc* blockDescs,					\
+	const PxReal invDtF32,								\
+	const PxReal dtF32,									\
+	PxReal bounceThresholdF32,							\
+	PxReal	frictionThresholdF32,						\
+	PxReal	correlationDistanceF32,						\
+	PxConstraintAllocator& constraintAllocator				
+	
+/*!
+Method prototype for create finalize solver contact
+*/
+
+typedef	bool (*PxcCreateFinalizeSolverContactMethod)(CREATE_FINALIZE_SOLVER_CONTACT_METHOD_ARGS);
+
+typedef	SolverConstraintPrepState::Enum (*PxcCreateFinalizeSolverContactMethod4)(CREATE_FINALIZE_SOLVER_CONTACT_METHOD_ARGS_4);
+
+bool createFinalizeSolverContacts(	PxSolverContactDesc& contactDesc,
+									PxsContactManagerOutput& output,
+									ThreadContext& threadContext,
+									const PxReal invDtF32,
+									const PxReal dtF32,
+									PxReal bounceThresholdF32,
+									PxReal frictionOffsetThreshold,
+									PxReal correlationDistance,
+									PxConstraintAllocator& constraintAllocator,
+									Cm::SpatialVectorF* Z);
+
+bool createFinalizeSolverContacts(	PxSolverContactDesc& contactDesc,
+									CorrelationBuffer& c,
+									const PxReal invDtF32,
+									const PxReal dtF32,
+									PxReal bounceThresholdF32,
+									PxReal frictionOffsetThreshold,
+									PxReal correlationDistance,
+									PxConstraintAllocator& constraintAllocator,
+									Cm::SpatialVectorF* Z);
+
+SolverConstraintPrepState::Enum createFinalizeSolverContacts4(	PxsContactManagerOutput** outputs,
+																 ThreadContext& threadContext,
+																 PxSolverContactDesc* blockDescs,
+																 const PxReal invDtF32,
+																 const PxReal dtF32,
+																 PxReal bounceThresholdF32,
+																 PxReal frictionOffsetThreshold,
+																 PxReal correlationDistance,
+																 PxConstraintAllocator& constraintAllocator);
+
+SolverConstraintPrepState::Enum createFinalizeSolverContacts4(	Dy::CorrelationBuffer& c,
+																PxSolverContactDesc* blockDescs,
+																const PxReal invDtF32,
+																const PxReal dtF32,
+																PxReal bounceThresholdF32,
+																PxReal	frictionOffsetThreshold,
+																PxReal correlationDistance,
+																PxConstraintAllocator& constraintAllocator);
+
+bool createFinalizeSolverContactsCoulomb1D(PxSolverContactDesc& contactDesc,
+											 PxsContactManagerOutput& output,
+											 ThreadContext& threadContext,
+											 const PxReal invDtF32,
+											 const PxReal dtF32,
+											 PxReal bounceThresholdF32,
+											 PxReal frictionOffsetThreshold,
+											 PxReal correlationDistance,
+											 PxConstraintAllocator& constraintAllocator,
+											 Cm::SpatialVectorF* Z);
+
+bool createFinalizeSolverContactsCoulomb2D(PxSolverContactDesc& contactDesc,
+											PxsContactManagerOutput& output,
+											ThreadContext& threadContext,
+											const PxReal invDtF32,
+											const PxReal dtF32,
+											PxReal bounceThresholdF32,
+											PxReal frictionOffsetThreshold,
+											PxReal correlationDistance,
+											PxConstraintAllocator& constraintAllocator,
+											Cm::SpatialVectorF* Z);
+
+SolverConstraintPrepState::Enum createFinalizeSolverContacts4Coulomb1D(	PxsContactManagerOutput** outputs,
+																		ThreadContext& threadContext,
+																		 PxSolverContactDesc* blockDescs,
+																		 const PxReal invDtF32,
+																		 const PxReal dtF32,
+																		 PxReal bounceThresholdF32,
+																		 PxReal frictionOffsetThreshold,
+																		 PxReal correlationDistance,
+																		 PxConstraintAllocator& constraintAllocator);
+
+SolverConstraintPrepState::Enum createFinalizeSolverContacts4Coulomb2D(PxsContactManagerOutput** outputs,
+																		ThreadContext& threadContext,
+																		PxSolverContactDesc* blockDescs,
+																		const PxReal invDtF32,
+																		const PxReal dtF32,
+																		PxReal bounceThresholdF32,
+																		PxReal frictionOffsetThreshold,
+																		PxReal correlationDistance,
+																		PxConstraintAllocator& constraintAllocator);
+
+PxU32 getContactManagerConstraintDesc(const PxsContactManagerOutput& cmOutput, const PxsContactManager& cm, PxSolverConstraintDesc& desc);
+
+template <typename SolverContactDesc>
+void updateFrictionAnchorCountAndPosition(PxSolverConstraintDesc& desc, PxsContactManagerOutput& output, SolverContactDesc& blockDesc);
+
+template <typename SolverFriction>
+void writeBackContactFriction(const SolverFriction* PX_RESTRICT frictions, PxU32 numFrictionConstr, PxU32 frictionStride, PxVec3* PX_RESTRICT vFrictionWriteback);
+
+class BlockAllocator : public PxConstraintAllocator
+{
+	PxsConstraintBlockManager& mConstraintBlockManager;
+	PxcConstraintBlockStream& mConstraintBlockStream;
+	FrictionPatchStreamPair& mFrictionPatchStreamPair;
+	PxU32& mTotalConstraintByteSize;
+public:
+
+	BlockAllocator(PxsConstraintBlockManager& constraintBlockManager, PxcConstraintBlockStream& constraintBlockStream, FrictionPatchStreamPair& frictionPatchStreamPair,
+		PxU32& totalConstraintByteSize) :
+		mConstraintBlockManager(constraintBlockManager), mConstraintBlockStream(constraintBlockStream), mFrictionPatchStreamPair(frictionPatchStreamPair),
+		mTotalConstraintByteSize(totalConstraintByteSize)
+	{
+	}
+
+	virtual PxU8* reserveConstraintData(const PxU32 size);
+
+	virtual PxU8* reserveFrictionData(const PxU32 size);
+
+	virtual PxU8* findInputPatches(PxU8* frictionCookie)
+	{
+		return frictionCookie;
+	}
+
+	PX_NOCOPY(BlockAllocator)
+};
+
+}
+
+}
+
+#endif
--- a/engine/third_party/physx/source/lowleveldynamics/src/DyContactPrep4.cpp
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DyContactPrep4.cpp
--- a/engine/third_party/physx/source/lowleveldynamics/src/DyContactPrepShared.h
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DyContactPrepShared.h
@@ -0,0 +1,396 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef DY_CONTACT_PREP_SHARED_H
+#define DY_CONTACT_PREP_SHARED_H
+     
+#include "foundation/PxPreprocessor.h"
+#include "foundation/PxVecMath.h"
+#include "PxMaterial.h"
+#include "DyContactPrep.h"
+#include "DyCorrelationBuffer.h"
+#include "DyArticulationContactPrep.h"
+#include "PxsContactManager.h"
+#include "PxsContactManagerState.h"
+#include "PxcNpContactPrepShared.h"
+#include "DySolverContact4.h"
+
+namespace physx
+{
+namespace Dy
+{
+template<class PxSolverContactDescT>
+PX_FORCE_INLINE Sc::ShapeInteraction* getInteraction(const PxSolverContactDescT& desc)
+{
+	return reinterpret_cast<Sc::ShapeInteraction*>(desc.shapeInteraction);
+}
+
+PX_FORCE_INLINE bool pointsAreClose(const PxTransform& body1ToBody0,
+									const PxVec3& localAnchor0, const PxVec3& localAnchor1,
+									const PxVec3& axis, float correlDist)
+{
+	const PxVec3 body0PatchPoint1 = body1ToBody0.transform(localAnchor1);
+
+	return PxAbs((localAnchor0 - body0PatchPoint1).dot(axis))<correlDist;
+}
+
+PX_FORCE_INLINE bool isSeparated(const FrictionPatch& patch, const PxTransform& body1ToBody0, const PxReal correlationDistance)
+{
+	PX_ASSERT(patch.anchorCount <= 2);
+	for(PxU32 a = 0; a < patch.anchorCount; ++a)
+	{
+		if(!pointsAreClose(body1ToBody0, patch.body0Anchors[a], patch.body1Anchors[a], patch.body0Normal, correlationDistance))
+			return true;
+	}
+	return false;
+}
+
+inline bool getFrictionPatches(CorrelationBuffer& c,
+						const PxU8* frictionCookie,
+						PxU32 frictionPatchCount,
+						const PxTransform& bodyFrame0,
+						const PxTransform& bodyFrame1,
+						PxReal correlationDistance)
+{
+	if(frictionCookie == NULL || frictionPatchCount == 0)
+		return true;
+
+	//KS - this is now DMA'd inside the shader so we don't need to immediate DMA it here
+	const FrictionPatch* patches = reinterpret_cast<const FrictionPatch*>(frictionCookie);
+
+	//Try working out relative transforms! TODO - can we compute this lazily for the first friction patch
+	bool evaluated = false;
+	PxTransform body1ToBody0;
+
+	while(frictionPatchCount--)
+	{
+		PxPrefetchLine(patches,128);
+		const FrictionPatch& patch = *patches++;
+		PX_ASSERT (patch.broken == 0 || patch.broken == 1);
+		if(!patch.broken)
+		{
+			// if the eDISABLE_STRONG_FRICTION flag is there we need to blow away the previous frame's friction correlation, so
+			// that we can associate each friction anchor with a target velocity. So we lose strong friction.
+			if(patch.anchorCount != 0 && !(patch.materialFlags & PxMaterialFlag::eDISABLE_STRONG_FRICTION))
+			{
+				PX_ASSERT(patch.anchorCount <= 2);
+			
+				if(!evaluated)
+				{
+					body1ToBody0 = bodyFrame0.transformInv(bodyFrame1);
+					evaluated = true;
+				}
+
+				if(patch.body0Normal.dot(body1ToBody0.rotate(patch.body1Normal)) > PXC_SAME_NORMAL)
+				{
+					if(!isSeparated(patch, body1ToBody0, correlationDistance))
+					{
+						if(c.frictionPatchCount == CorrelationBuffer::MAX_FRICTION_PATCHES)
+							return false;
+						{
+							c.contactID[c.frictionPatchCount][0] = 0xffff;
+							c.contactID[c.frictionPatchCount][1] = 0xffff;
+							//Rotate the contact normal into world space
+							c.frictionPatchWorldNormal[c.frictionPatchCount] = bodyFrame0.rotate(patch.body0Normal);
+							c.frictionPatchContactCounts[c.frictionPatchCount] = 0;
+							c.patchBounds[c.frictionPatchCount].setEmpty();
+							c.correlationListHeads[c.frictionPatchCount] = CorrelationBuffer::LIST_END;
+							PxMemCopy(&c.frictionPatches[c.frictionPatchCount++], &patch, sizeof(FrictionPatch));
+						}
+					}
+				}
+			}
+		}
+	}
+	return true;
+}
+
+PX_FORCE_INLINE PxU32 extractContacts(PxContactBuffer& buffer, const PxsContactManagerOutput& npOutput, bool& hasMaxImpulse, bool& hasTargetVelocity,
+							 PxReal& invMassScale0, PxReal& invMassScale1, PxReal& invInertiaScale0, PxReal& invInertiaScale1, PxReal defaultMaxImpulse)
+{
+	PxContactStreamIterator iter(npOutput.contactPatches, npOutput.contactPoints, npOutput.getInternalFaceIndice(), npOutput.nbPatches, npOutput.nbContacts);
+
+	PxU32 numContacts = buffer.count, origContactCount = buffer.count;
+	if(!iter.forceNoResponse)
+	{
+		invMassScale0 = iter.getInvMassScale0();
+		invMassScale1 = iter.getInvMassScale1();
+		invInertiaScale0 = iter.getInvInertiaScale0();
+		invInertiaScale1 = iter.getInvInertiaScale1();
+		hasMaxImpulse = (iter.patch->internalFlags & PxContactPatch::eHAS_MAX_IMPULSE) != 0;
+		hasTargetVelocity = (iter.patch->internalFlags & PxContactPatch::eHAS_TARGET_VELOCITY) != 0;
+
+		while(iter.hasNextPatch())
+		{
+			iter.nextPatch();
+			while(iter.hasNextContact())
+			{
+				iter.nextContact();
+				PxPrefetchLine(iter.contact, 128);
+				PxPrefetchLine(&buffer.contacts[numContacts], 128);
+				PxReal maxImpulse = hasMaxImpulse ? iter.getMaxImpulse() : defaultMaxImpulse;
+				if(maxImpulse != 0.f)
+				{
+					PX_ASSERT(numContacts < PxContactBuffer::MAX_CONTACTS);
+					buffer.contacts[numContacts].normal = iter.getContactNormal();
+					PX_ASSERT(PxAbs(buffer.contacts[numContacts].normal.magnitude() - 1) < 1e-3f);
+					buffer.contacts[numContacts].point = iter.getContactPoint();
+					buffer.contacts[numContacts].separation = iter.getSeparation();
+					//KS - we use the face indices to cache the material indices and flags - avoids bloating the PxContact structure
+					PX_ASSERT(iter.getMaterialFlags() <= PX_MAX_U8);
+					buffer.contacts[numContacts].materialFlags = PxU8(iter.getMaterialFlags());
+					buffer.contacts[numContacts].maxImpulse = maxImpulse;
+					buffer.contacts[numContacts].staticFriction = iter.getStaticFriction();
+					buffer.contacts[numContacts].dynamicFriction = iter.getDynamicFriction();
+					buffer.contacts[numContacts].restitution = iter.getRestitution();
+					buffer.contacts[numContacts].damping = iter.getDamping();
+					const PxVec3& targetVel = iter.getTargetVel();
+					buffer.contacts[numContacts].targetVel = targetVel;
+					++numContacts;
+				}
+			}
+		}
+	}
+	const PxU32 contactCount = numContacts - origContactCount;
+	buffer.count = numContacts;
+	return contactCount;
+}
+
+struct CorrelationListIterator
+{
+	CorrelationBuffer& buffer;
+	PxU32 currPatch;
+	PxU32 currContact;
+
+	CorrelationListIterator(CorrelationBuffer& correlationBuffer, PxU32 startPatch) : buffer(correlationBuffer)
+	{
+		//We need to force us to advance the correlation buffer to the first available contact (if one exists)
+		PxU32 newPatch = startPatch, newContact = 0;
+
+		while(newPatch != CorrelationBuffer::LIST_END && newContact == buffer.contactPatches[newPatch].count)
+		{
+			newPatch = buffer.contactPatches[newPatch].next;
+			newContact = 0;
+		}
+
+		currPatch = newPatch;
+		currContact = newContact;
+	}
+
+	//Returns true if it has another contact pre-loaded. Returns false otherwise
+	PX_FORCE_INLINE bool hasNextContact()	const
+	{
+		return (currPatch != CorrelationBuffer::LIST_END && currContact < buffer.contactPatches[currPatch].count);
+	}
+
+	inline void nextContact(PxU32& patch, PxU32& contact)
+	{
+		PX_ASSERT(currPatch != CorrelationBuffer::LIST_END);
+		PX_ASSERT(currContact < buffer.contactPatches[currPatch].count);
+
+		patch = currPatch;
+		contact = currContact;
+		PxU32 newPatch = currPatch, newContact = currContact + 1;
+
+		while(newPatch != CorrelationBuffer::LIST_END && newContact == buffer.contactPatches[newPatch].count)
+		{
+			newPatch = buffer.contactPatches[newPatch].next;
+			newContact = 0;
+		}
+
+		currPatch = newPatch;
+		currContact = newContact;
+	}
+
+private:
+	CorrelationListIterator& operator=(const CorrelationListIterator&);
+
+};
+
+namespace {
+
+// Decides whether or not the damper should be turned on. We don't want damping if the contact
+// is not expected to be closed this step because the damper can produce repulsive forces
+// even before the contact is closed.
+PX_FORCE_INLINE FloatV computeCompliantDamping(const BoolVArg isSeparated, const BoolVArg collidingWithVrel,
+                                               const FloatVArg damping)
+{
+	const FloatV zero = FZero();
+	const FloatV dampingIfEnabled = FSel(BAndNot(isSeparated, collidingWithVrel), zero, damping);
+	return dampingIfEnabled;
+}
+
+PX_FORCE_INLINE void computeCompliantContactCoefficients(
+    const FloatVArg dt, const FloatVArg restitution, const FloatVArg damping, const FloatVArg recipResponse,
+    const FloatVArg unitResponse, const FloatVArg penetration, const FloatVArg targetVelocity,
+    const BoolVArg accelerationSpring, const BoolVArg isSeparated, const BoolVArg collidingWithVrel,
+    FloatVArg velMultiplier, FloatVArg impulseMultiplier, FloatVArg unbiasedErr, FloatVArg biasedErr)
+{
+	// negative restitution interpreted as spring stiffness for compliant contact
+	const FloatV nrdt = FMul(dt, restitution); // -dt*stiffness
+
+	const FloatV one = FOne();
+	const FloatV massIfAccelElseOne = FSel(accelerationSpring, recipResponse, one);
+	const FloatV dampingIfEnabled = computeCompliantDamping(isSeparated, collidingWithVrel, damping);
+
+	const FloatV a = FMul(dt, FSub(dampingIfEnabled, nrdt));                  // a = dt * (damping + dt*stiffness)
+	const FloatV b = FMul(FNeg(FMul(nrdt, penetration)), massIfAccelElseOne); // b = dt * stiffness * penetration
+	const FloatV x = FRecip(FScaleAdd(a, FSel(accelerationSpring, one, unitResponse), one));
+	const FloatV scaledBias = FMul(x, b);
+	// FloatV scaledBias = FSel(isSeparated, FNeg(invStepDt), FDiv(FMul(nrdt, FMul(x, unitResponse)), velMultiplier));
+
+	velMultiplier = FMul(FMul(x, a), massIfAccelElseOne);
+	impulseMultiplier = FSub(one, x);
+	unbiasedErr = biasedErr = FScaleAdd(targetVelocity, velMultiplier, FNeg(scaledBias));
+}
+
+PX_FORCE_INLINE void computeCompliantContactCoefficientsTGS(const FloatVArg dt, const FloatVArg restitution,
+                                                            const FloatVArg damping, const FloatVArg recipResponse,
+                                                            const FloatVArg unitResponse,
+                                                            const BoolVArg accelerationSpring,
+                                                            const BoolVArg isSeparated, const BoolVArg collidingWithVrel,
+                                                            FloatVArg velMultiplier, FloatVArg biasCoeff)
+{
+	const FloatV nrdt = FMul(dt, restitution);      // -dt * stiffness
+	const FloatV dampingIfEnabled = computeCompliantDamping(isSeparated, collidingWithVrel, damping);
+	const FloatV a = FMul(dt, FSub(dampingIfEnabled, nrdt)); // a = dt * (damping + dt * stiffness)
+
+	const FloatV one = FOne();
+	const FloatV massIfAccelElseOne = FSel(accelerationSpring, recipResponse, one);
+	const FloatV oneIfAccelElseR = FSel(accelerationSpring, one, unitResponse);
+
+	const FloatV x = FRecip(FScaleAdd(a, oneIfAccelElseR, one));
+
+	velMultiplier = FMul(FMul(x, a), massIfAccelElseOne);
+	// biasCoeff = FSel(isSeparated, FNeg(invStepDt), FDiv(FMul(nrdt, FMul(x, unitResponse)), velMultiplier));
+	// biasCoeff includes the unit response s.t. velDeltaFromPosError = separation*biasCoeff
+	biasCoeff = FMul(nrdt, FMul(x, oneIfAccelElseR));
+}
+
+} // anonymous namespace
+
+// PGS rigid-rigid or rigid-static normal contact prepping code
+PX_FORCE_INLINE void constructContactConstraint(const Mat33V& invSqrtInertia0, const Mat33V& invSqrtInertia1, const FloatVArg invMassNorLenSq0, 
+	const FloatVArg invMassNorLenSq1, const FloatVArg angD0, const FloatVArg angD1, const Vec3VArg bodyFrame0p, const Vec3VArg bodyFrame1p,
+	const Vec3VArg normal, const FloatVArg norVel, const VecCrossV& norCross, const Vec3VArg angVel0, const Vec3VArg angVel1,
+	const FloatVArg invDt, const FloatVArg invDtp8, const FloatVArg dt, const FloatVArg restDistance, const FloatVArg maxPenBias, const FloatVArg restitution,
+	const FloatVArg bounceThreshold, const PxContactPoint& contact, SolverContactPoint& solverContact,
+	const FloatVArg ccdMaxSeparation, const Vec3VArg solverOffsetSlop, const FloatVArg damping, const BoolVArg accelerationSpring)
+{
+	const FloatV zero = FZero();
+	const Vec3V point = V3LoadA(contact.point);
+	const FloatV separation = FLoad(contact.separation);
+
+	const FloatV cTargetVel = V3Dot(normal, V3LoadA(contact.targetVel));
+
+	const Vec3V ra = V3Sub(point, bodyFrame0p);
+	const Vec3V rb = V3Sub(point, bodyFrame1p);
+
+	/*ra = V3Sel(V3IsGrtr(solverOffsetSlop, V3Abs(ra)), V3Zero(), ra);
+	rb = V3Sel(V3IsGrtr(solverOffsetSlop, V3Abs(rb)), V3Zero(), rb);*/
+
+	Vec3V raXn = V3Cross(ra, norCross);
+	Vec3V rbXn = V3Cross(rb, norCross);
+
+	FloatV vRelAng = FSub(V3Dot(raXn, angVel0), V3Dot(rbXn, angVel1));
+
+	const Vec3V slop = V3Scale(solverOffsetSlop, FMax(FSel(FIsEq(norVel, zero), FMax(), FDiv(vRelAng, norVel)), FOne()));
+
+	raXn = V3Sel(V3IsGrtr(slop, V3Abs(raXn)), V3Zero(), raXn);
+	rbXn = V3Sel(V3IsGrtr(slop, V3Abs(rbXn)), V3Zero(), rbXn);
+
+	vRelAng = FSub(V3Dot(raXn, angVel0), V3Dot(rbXn, angVel1));
+
+	const FloatV vrel = FAdd(norVel, vRelAng);
+
+	const Vec3V raXnSqrtInertia = M33MulV3(invSqrtInertia0, raXn);
+	const Vec3V rbXnSqrtInertia = M33MulV3(invSqrtInertia1, rbXn);				
+
+	const FloatV resp0 = FAdd(invMassNorLenSq0, FMul(V3Dot(raXnSqrtInertia, raXnSqrtInertia), angD0));
+	const FloatV resp1 = FSub(FMul(V3Dot(rbXnSqrtInertia, rbXnSqrtInertia), angD1), invMassNorLenSq1);
+
+	const FloatV unitResponse = FAdd(resp0, resp1);
+
+	const FloatV penetration = FSub(separation, restDistance);
+	const FloatV penetrationInvDt = FMul(penetration, invDt);
+	const BoolV isSeparated = FIsGrtrOrEq(penetration, zero);
+
+	const BoolV collidingWithVrel = FIsGrtr(FNeg(vrel), penetrationInvDt); // true if pen + dt*vrel < 0
+	const BoolV isGreater2 = BAnd(BAnd(FIsGrtr(restitution, zero), FIsGrtr(bounceThreshold, vrel)), collidingWithVrel);
+
+	FloatV targetVelocity = FAdd(cTargetVel, FSel(isGreater2, FMul(FNeg(vrel), restitution), zero));
+
+	//Note - we add on the initial target velocity
+	targetVelocity = FSub(targetVelocity, vrel);
+
+	const FloatV recipResponse = FSel(FIsGrtr(unitResponse, zero), FRecip(unitResponse), zero);
+
+	FloatV biasedErr, unbiasedErr;
+	FloatV velMultiplier, impulseMultiplier;
+
+	if (FAllGrtr(zero, restitution))
+	{
+		computeCompliantContactCoefficients(dt, restitution, damping, recipResponse, unitResponse, penetration,
+		                                    targetVelocity, accelerationSpring, isSeparated, collidingWithVrel,
+		                                    velMultiplier, impulseMultiplier, unbiasedErr, biasedErr);
+	}
+	else
+	{
+		velMultiplier = recipResponse;
+
+		// Divide bias term by dt and additionally scale it down if it's in penetration
+		// Do not scale it if it's not in penetration, otherwise we falsely act on contacts that are still
+		// sufficiently far away.
+		const FloatV penetrationInvDtScaled = FSel(isSeparated, penetrationInvDt, FMul(penetration, invDtp8));
+
+		FloatV scaledBias = FMul(velMultiplier, FMax(maxPenBias, penetrationInvDtScaled));
+
+		const BoolV ccdSeparationCondition = FIsGrtrOrEq(ccdMaxSeparation, penetration);
+
+		scaledBias = FSel(BAnd(ccdSeparationCondition, isGreater2), zero, scaledBias);
+
+		impulseMultiplier = FLoad(1.0f);
+
+		biasedErr = FScaleAdd(targetVelocity, velMultiplier, FNeg(scaledBias));
+		unbiasedErr = FScaleAdd(targetVelocity, velMultiplier, FSel(isGreater2, zero, FNeg(FMax(scaledBias, zero))));
+	}
+
+	//const FloatV unbiasedErr = FScaleAdd(targetVelocity, velMultiplier, FNeg(FMax(scaledBias, zero)));
+
+	FStore(biasedErr, &solverContact.biasedErr);
+	FStore(unbiasedErr, &solverContact.unbiasedErr);
+	FStore(impulseMultiplier, &solverContact.impulseMultiplier);
+
+	solverContact.raXn_velMultiplierW = V4SetW(Vec4V_From_Vec3V(raXnSqrtInertia), velMultiplier);
+	solverContact.rbXn_maxImpulseW = V4SetW(Vec4V_From_Vec3V(rbXnSqrtInertia), FLoad(contact.maxImpulse));
+}
+
+}
+}
+
+#endif
--- a/engine/third_party/physx/source/lowleveldynamics/src/DyContactReduction.h
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DyContactReduction.h
@@ -0,0 +1,408 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef DY_CONTACT_REDUCTION_H
+#define DY_CONTACT_REDUCTION_H
+
+#include "foundation/PxMemory.h"
+#include "foundation/PxSort.h"
+#include "PxsMaterialManager.h"
+#include "geomutils/PxContactPoint.h"
+#include "geomutils/PxContactBuffer.h"
+
+namespace physx
+{
+namespace Dy
+{
+
+//KS - might be OK with 4 but 5 guarantees the deepest + 4 contacts that contribute to largest surface area
+// #define CONTACT_REDUCTION_MAX_CONTACTS 6 // Replaced by template argument MaxContactsPerPatch
+#define CONTACT_REDUCTION_MAX_PATCHES 32
+#define PXS_NORMAL_TOLERANCE 0.995f
+#define PXS_SEPARATION_TOLERANCE 0.001f
+
+
+	//A patch contains a normal, pair of material indices and a list of indices. These indices are 
+	//used to index into the PxContact array that's passed by the user
+	template <PxU32 MaxContactsPerPatch>
+	struct ReducedContactPatch
+	{
+		PxU32 numContactPoints;
+		PxU32 contactPoints[MaxContactsPerPatch];
+	};
+
+	struct ContactPatch
+	{	
+		PxVec3 rootNormal;
+		ContactPatch* mNextPatch;
+		PxReal maxPenetration;
+		PxU16 startIndex;
+		PxU16 stride;
+		PxU16 rootIndex;
+		PxU16 index;
+	};
+
+	struct SortBoundsPredicateManifold
+	{
+		bool operator()(const ContactPatch* idx1, const ContactPatch* idx2) const
+		{
+			return idx1->maxPenetration < idx2->maxPenetration;
+		}
+	};
+
+
+
+	template <PxU32 MaxPatches, PxU32 MaxContactsPerPatch>
+	class ContactReduction
+	{
+	public:
+		ReducedContactPatch<MaxContactsPerPatch> mPatches[MaxPatches];
+		PxU32 mNumPatches;
+		ContactPatch mIntermediatePatches[CONTACT_REDUCTION_MAX_PATCHES];
+		ContactPatch* mIntermediatePatchesPtrs[CONTACT_REDUCTION_MAX_PATCHES];
+		PxU32 mNumIntermediatePatches;
+		PxContactPoint* PX_RESTRICT mOriginalContacts;
+		PxsMaterialInfo* PX_RESTRICT mMaterialInfo;
+		PxU32 mNumOriginalContacts;
+
+		ContactReduction(PxContactPoint* PX_RESTRICT originalContacts, PxsMaterialInfo* PX_RESTRICT materialInfo, PxU32 numContacts) : 
+		mNumPatches(0), mNumIntermediatePatches(0),	mOriginalContacts(originalContacts), mMaterialInfo(materialInfo), mNumOriginalContacts(numContacts)
+		{
+		}
+
+		void reduceContacts()
+		{
+			//First pass, break up into contact patches, storing the start and stride of the patches
+			//We will need to have contact patches and then coallesce them
+			mIntermediatePatches[0].rootNormal = mOriginalContacts[0].normal;
+			mIntermediatePatches[0].mNextPatch = NULL;
+			mIntermediatePatches[0].startIndex = 0;
+			mIntermediatePatches[0].rootIndex = 0;
+			mIntermediatePatches[0].maxPenetration = mOriginalContacts[0].separation;
+			mIntermediatePatches[0].index = 0;
+			PxU16 numPatches = 1;
+			//PxU32 startIndex = 0;
+			PxU16 m = 1;
+			for(; m < mNumOriginalContacts; ++m)
+			{
+				PxI32 index = -1;
+				for(PxU32 b = numPatches; b > 0; --b)
+				{
+					ContactPatch& patch = mIntermediatePatches[b-1];
+					if(mMaterialInfo[patch.startIndex].mMaterialIndex0 == mMaterialInfo[m].mMaterialIndex0 && mMaterialInfo[patch.startIndex].mMaterialIndex1 == mMaterialInfo[m].mMaterialIndex1 && 
+						patch.rootNormal.dot(mOriginalContacts[m].normal) >= PXS_NORMAL_TOLERANCE)
+					{
+						index = PxI32(b-1);
+						break;
+					}
+				}
+
+				if(index != numPatches - 1)
+				{
+					mIntermediatePatches[numPatches-1].stride = PxU16(m - mIntermediatePatches[numPatches - 1].startIndex);
+					//Create a new patch...
+					if(numPatches == CONTACT_REDUCTION_MAX_PATCHES)
+					{
+						break;
+					}
+					mIntermediatePatches[numPatches].startIndex = m;
+					mIntermediatePatches[numPatches].mNextPatch = NULL;
+					if(index == -1)
+					{
+						mIntermediatePatches[numPatches].rootIndex = numPatches;
+						mIntermediatePatches[numPatches].rootNormal = mOriginalContacts[m].normal;
+						mIntermediatePatches[numPatches].maxPenetration = mOriginalContacts[m].separation;
+						mIntermediatePatches[numPatches].index = numPatches;
+					}
+					else
+					{
+						//Find last element in the link
+						PxU16 rootIndex = mIntermediatePatches[index].rootIndex;
+						mIntermediatePatches[index].mNextPatch = &mIntermediatePatches[numPatches];
+						mIntermediatePatches[numPatches].rootNormal = mIntermediatePatches[index].rootNormal;
+						mIntermediatePatches[rootIndex].maxPenetration = mIntermediatePatches[numPatches].maxPenetration = PxMin(mIntermediatePatches[rootIndex].maxPenetration, mOriginalContacts[m].separation);
+						mIntermediatePatches[numPatches].rootIndex = rootIndex;
+						mIntermediatePatches[numPatches].index = numPatches;
+					}
+					++numPatches;
+				}
+			}
+			mIntermediatePatches[numPatches-1].stride = PxU16(m - mIntermediatePatches[numPatches-1].startIndex);
+
+			//OK, we have a list of contact patches so that we can start contact reduction per-patch
+
+			//OK, now we can go and reduce the contacts on a per-patch basis...
+
+			for(PxU32 a = 0; a < numPatches; ++a)
+			{
+				mIntermediatePatchesPtrs[a] = &mIntermediatePatches[a];
+			}
+
+
+			SortBoundsPredicateManifold predicate;
+			PxSort(mIntermediatePatchesPtrs, numPatches, predicate);
+
+			PxU32 numReducedPatches = 0;
+			for(PxU32 a = 0; a < numPatches; ++a)
+			{
+				if(mIntermediatePatchesPtrs[a]->rootIndex == mIntermediatePatchesPtrs[a]->index)
+				{
+					//Reduce this patch...
+					if(numReducedPatches == MaxPatches)
+						break;
+
+					ReducedContactPatch<MaxContactsPerPatch>& reducedPatch = mPatches[numReducedPatches++];
+					//OK, now we need to work out if we have to reduce patches...
+					PxU32 contactCount = 0;
+					{
+						ContactPatch* tmpPatch = mIntermediatePatchesPtrs[a];
+
+						while(tmpPatch)
+						{
+							contactCount += tmpPatch->stride;
+							tmpPatch = tmpPatch->mNextPatch;
+						}
+					}
+
+					if(contactCount <= MaxContactsPerPatch)
+					{
+						//Just add the contacts...
+						ContactPatch* tmpPatch = mIntermediatePatchesPtrs[a];
+
+						PxU32 ind = 0;
+						while(tmpPatch)
+						{
+							for(PxU32 b = 0; b < tmpPatch->stride; ++b)
+							{
+								reducedPatch.contactPoints[ind++] = tmpPatch->startIndex + b;
+							}
+							tmpPatch = tmpPatch->mNextPatch;
+						}
+						reducedPatch.numContactPoints = contactCount;
+					}
+					else
+					{
+						//Iterate through and find the most extreme point
+						
+
+						PxU32 ind = 0;
+
+						{
+							PxReal dist = 0.f;
+							ContactPatch* tmpPatch = mIntermediatePatchesPtrs[a];
+							while(tmpPatch)
+							{
+								for(PxU32 b = 0; b < tmpPatch->stride; ++b)
+								{
+									PxReal magSq = mOriginalContacts[tmpPatch->startIndex + b].point.magnitudeSquared();
+									if(dist < magSq)
+									{
+										ind = tmpPatch->startIndex + b;
+										dist = magSq;
+									}
+								}
+								tmpPatch = tmpPatch->mNextPatch;
+							}
+						}	
+						reducedPatch.contactPoints[0] = ind;
+						const PxVec3 p0 = mOriginalContacts[ind].point;
+
+						//Now find the point farthest from this point...						
+						{
+							PxReal maxDist = 0.f;
+							ContactPatch* tmpPatch = mIntermediatePatchesPtrs[a];
+							while(tmpPatch)
+							{
+								for(PxU32 b = 0; b < tmpPatch->stride; ++b)
+								{
+									PxReal magSq = (p0 - mOriginalContacts[tmpPatch->startIndex + b].point).magnitudeSquared();
+									if(magSq > maxDist)
+									{
+										ind = tmpPatch->startIndex + b;
+										maxDist = magSq;
+									}
+								}
+								tmpPatch = tmpPatch->mNextPatch;
+							}
+						}
+						reducedPatch.contactPoints[1] = ind;
+						const PxVec3 p1 = mOriginalContacts[ind].point;
+
+						//Now find the point farthest from the segment
+
+						PxVec3 n = (p0 - p1).cross(mIntermediatePatchesPtrs[a]->rootNormal);
+
+						//PxReal tVal = 0.f;
+						{
+							PxReal maxDist = 0.f;
+							//PxReal tmpTVal;
+							
+							ContactPatch* tmpPatch = mIntermediatePatchesPtrs[a];
+							while(tmpPatch)
+							{
+								for(PxU32 b = 0; b < tmpPatch->stride; ++b)
+								{
+									
+									//PxReal magSq = tmpDistancePointSegmentSquared(p0, p1, mOriginalContacts[tmpPatch->startIndex + b].point, tmpTVal);
+									PxReal magSq = (mOriginalContacts[tmpPatch->startIndex + b].point - p0).dot(n);
+									if(magSq > maxDist)
+									{
+										ind = tmpPatch->startIndex + b;
+										//tVal = tmpTVal;
+										maxDist = magSq;
+									}
+								}
+								tmpPatch = tmpPatch->mNextPatch;
+							}
+						}
+						reducedPatch.contactPoints[2] = ind;
+
+						//const PxVec3 closest = (p0 + (p1 - p0) * tVal);
+
+						const PxVec3 dir = -n;//closest - p3;
+
+						{
+							PxReal maxDist = 0.f;
+							//PxReal tVal = 0.f;
+							ContactPatch* tmpPatch = mIntermediatePatchesPtrs[a];
+							while(tmpPatch)
+							{
+								for(PxU32 b = 0; b < tmpPatch->stride; ++b)
+								{
+									PxReal magSq = (mOriginalContacts[tmpPatch->startIndex + b].point - p0).dot(dir);
+									if(magSq > maxDist)
+									{
+										ind = tmpPatch->startIndex + b;
+										maxDist = magSq;
+									}
+								}
+								tmpPatch = tmpPatch->mNextPatch;
+							}
+						}
+						reducedPatch.contactPoints[3] = ind;
+
+						//Now, we iterate through all the points, and cluster the points. From this, we establish the deepest point that's within a 
+						//tolerance of this point and keep that point
+
+						PxReal separation[MaxContactsPerPatch];
+						PxU32 deepestInd[MaxContactsPerPatch];
+						for(PxU32 i = 0; i < 4; ++i)
+						{
+							PxU32 index = reducedPatch.contactPoints[i];
+							separation[i] = mOriginalContacts[index].separation - PXS_SEPARATION_TOLERANCE;
+							deepestInd[i] = index;
+						}
+
+						ContactPatch* tmpPatch = mIntermediatePatchesPtrs[a];
+						while(tmpPatch)
+						{
+							for(PxU32 b = 0; b < tmpPatch->stride; ++b)
+							{
+								PxContactPoint& point = mOriginalContacts[tmpPatch->startIndex + b];
+								
+								PxReal distance = PX_MAX_REAL;
+								PxU32 index = 0;
+								for(PxU32 c = 0; c < 4; ++c)
+								{
+									PxVec3 dif = mOriginalContacts[reducedPatch.contactPoints[c]].point - point.point;
+									PxReal d = dif.magnitudeSquared();
+									if(distance > d)
+									{
+										distance = d;
+										index = c;
+									}
+								}
+								if(separation[index] > point.separation)
+								{
+									deepestInd[index] = tmpPatch->startIndex+b;
+									separation[index] = point.separation;
+								}
+
+							}
+							tmpPatch = tmpPatch->mNextPatch;
+						}
+
+						bool chosen[PxContactBuffer::MAX_CONTACTS];
+						PxMemZero(chosen, sizeof(chosen));
+						for(PxU32 i = 0; i < 4; ++i)
+						{
+							reducedPatch.contactPoints[i] = deepestInd[i];
+							chosen[deepestInd[i]] = true;
+						}						
+						
+						for(PxU32 i = 4; i < MaxContactsPerPatch; ++i)
+						{
+							separation[i] = PX_MAX_REAL;
+							deepestInd[i] = 0;
+						}
+						tmpPatch = mIntermediatePatchesPtrs[a];
+						while(tmpPatch)
+						{
+							for(PxU32 b = 0; b < tmpPatch->stride; ++b)
+							{
+								if(!chosen[tmpPatch->startIndex+b])
+								{
+									PxContactPoint& point = mOriginalContacts[tmpPatch->startIndex + b];	
+									for(PxU32 j = 4; j < MaxContactsPerPatch; ++j)
+									{
+										if(point.separation < separation[j])
+										{
+											for(PxU32 k = MaxContactsPerPatch-1; k > j; --k)
+											{
+												separation[k] = separation[k-1];
+												deepestInd[k] = deepestInd[k-1];
+											}
+											separation[j] = point.separation;
+											deepestInd[j] = tmpPatch->startIndex+b;
+											break;
+										}
+									}
+								}
+							}
+							tmpPatch = tmpPatch->mNextPatch;
+						}
+
+						for(PxU32 i = 4; i < MaxContactsPerPatch; ++i)
+						{
+							reducedPatch.contactPoints[i] = deepestInd[i];
+						}
+
+						reducedPatch.numContactPoints = MaxContactsPerPatch;
+					}
+				}
+			}
+			mNumPatches = numReducedPatches;
+		}
+
+	};
+}
+
+}
+
+
+#endif
--- a/engine/third_party/physx/source/lowleveldynamics/src/DyCorrelationBuffer.h
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DyCorrelationBuffer.h
@@ -0,0 +1,102 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef DY_CORRELATION_BUFFER_H
+#define DY_CORRELATION_BUFFER_H
+
+#include "foundation/PxSimpleTypes.h"
+#include "foundation/PxVec3.h"
+#include "foundation/PxTransform.h"
+#include "foundation/PxBounds3.h"
+#include "geomutils/PxContactBuffer.h"
+
+#include "PxvConfig.h"
+#include "DyFrictionPatch.h"
+
+namespace physx
+{
+namespace Dy
+{
+struct CorrelationBuffer
+{
+	static const PxU32 MAX_FRICTION_PATCHES = 32;
+	static const PxU16 LIST_END = 0xffff;
+
+	struct ContactPatchData
+	{
+		PxBounds3	patchBounds;
+		PxU32		boundsPadding;
+
+		PxReal		staticFriction;
+		PxReal		dynamicFriction;
+		PxReal		restitution;
+		PxU16		start;
+		PxU16		next;
+		PxU8		flags;
+		PxU8		count;
+	};
+
+	// we can have as many contact patches as contacts, unfortunately
+	ContactPatchData	PX_ALIGN(16, contactPatches[PxContactBuffer::MAX_CONTACTS]);
+
+	FrictionPatch		PX_ALIGN(16, frictionPatches[MAX_FRICTION_PATCHES]);
+	PxVec3				PX_ALIGN(16, frictionPatchWorldNormal[MAX_FRICTION_PATCHES]);
+	PxBounds3			patchBounds[MAX_FRICTION_PATCHES];
+
+	PxU32				frictionPatchContactCounts[MAX_FRICTION_PATCHES];
+	PxU32				correlationListHeads[MAX_FRICTION_PATCHES+1];
+
+	// contact IDs are only used to identify auxiliary contact data when velocity
+	// targets have been set. 
+	PxU16				contactID[MAX_FRICTION_PATCHES][2];
+
+	PxU32				contactPatchCount, frictionPatchCount;
+};
+
+bool createContactPatches(CorrelationBuffer& fb, const PxContactPoint* cb, PxU32 contactCount, PxReal normalTolerance);
+
+bool correlatePatches(CorrelationBuffer& fb, 
+					  const PxContactPoint* cb,
+					  const PxTransform& bodyFrame0,
+					  const PxTransform& bodyFrame1,
+					  PxReal normalTolerance,
+					  PxU32 startContactPatchIndex,
+					  PxU32 startFrictionPatchIndex);
+
+void growPatches(CorrelationBuffer& fb,
+				 const PxContactPoint* buffer,
+				 const PxTransform& bodyFrame0,
+				 const PxTransform& bodyFrame1,
+				 PxU32 frictionPatchStartIndex,
+				 PxReal frictionOffsetThreshold);
+
+}
+
+}
+
+#endif
--- a/engine/third_party/physx/source/lowleveldynamics/src/DyDynamics.cpp
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DyDynamics.cpp
--- a/engine/third_party/physx/source/lowleveldynamics/src/DyDynamics.h
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DyDynamics.h
@@ -0,0 +1,270 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef DY_DYNAMICS_H
+#define DY_DYNAMICS_H
+
+#include "DyDynamicsBase.h"
+#include "PxvConfig.h"
+#include "CmTask.h"
+#include "CmPool.h"
+#include "PxcThreadCoherentCache.h"
+#include "PxcConstraintBlockStream.h"
+#include "DySolverBody.h"
+
+namespace physx
+{
+class PxsRigidBody;
+
+struct PxsBodyCore;
+class PxsIslandIndices;
+struct PxsIndexedInteraction;
+struct PxsIndexedContactManager;
+struct PxSolverConstraintDesc;
+
+namespace Cm
+{
+	class SpatialVector;
+}
+
+namespace Dy
+{
+	struct SolverIslandParams;
+	class DynamicsContext;
+
+#define SOLVER_PARALLEL_METHOD_ARGS	\
+	DynamicsContext&	context,	\
+	SolverIslandParams& params,		\
+	IG::IslandSim& islandSim
+
+/**
+\brief Solver body pool (array) that enforces 128-byte alignment for base address of array.
+\note This reduces cache misses on platforms with 128-byte-size cache lines by aligning the start of the array to the beginning of a cache line.
+*/
+class SolverBodyPool : public PxArray<PxSolverBody, PxAlignedAllocator<128, PxReflectionAllocator<PxSolverBody> > > 
+{ 
+	PX_NOCOPY(SolverBodyPool)
+public:
+	SolverBodyPool() {}
+};
+
+/**
+\brief Solver body data pool (array) that enforces 128-byte alignment for base address of array.
+\note This reduces cache misses on platforms with 128-byte-size cache lines by aligning the start of the array to the beginning of a cache line.
+*/
+class SolverBodyDataPool : public PxArray<PxSolverBodyData, PxAlignedAllocator<128, PxReflectionAllocator<PxSolverBodyData> > >
+{
+	PX_NOCOPY(SolverBodyDataPool)
+public:
+	SolverBodyDataPool() {}
+};
+
+class SolverConstraintDescPool : public PxArray<PxSolverConstraintDesc, PxAlignedAllocator<128, PxReflectionAllocator<PxSolverConstraintDesc> > >
+{
+	PX_NOCOPY(SolverConstraintDescPool)
+public:
+	SolverConstraintDescPool() { }
+};
+
+/**
+\brief Encapsulates an island's context
+*/
+
+struct IslandContext
+{
+	//The thread context for this island (set in in the island start task, released in the island end task)
+	ThreadContext*		mThreadContext;
+	PxsIslandIndices	mCounts;
+};
+
+/**
+\brief Encapsules the data used by the constraint solver.
+*/
+
+#if PX_VC 
+    #pragma warning(push)
+	#pragma warning( disable : 4324 ) // Padding was added at the end of a structure because of a __declspec(align) value.
+#endif
+
+class DynamicsContext : public DynamicsContextBase
+{
+	PX_NOCOPY(DynamicsContext)
+public:
+	
+										DynamicsContext(PxcNpMemBlockPool* memBlockPool,
+														PxcScratchAllocator& scratchAllocator,
+														Cm::FlushPool& taskPool,
+														PxvSimStats& simStats,
+														PxTaskManager* taskManager,
+														PxVirtualAllocatorCallback* allocatorCallback,
+														PxsMaterialManager* materialManager,
+														IG::SimpleIslandManager& islandManager,
+														PxU64 contextID,
+														bool enableStabilization,
+														bool useEnhancedDeterminism,
+														PxReal maxBiasCoefficient,
+														bool frictionEveryIteration,
+														PxReal lengthScale,
+														bool isResidualReportingEnabled
+														);
+
+	virtual								~DynamicsContext();
+
+	// Context
+	virtual	void						destroy()	PX_OVERRIDE;
+	virtual void						update(	Cm::FlushPool& flushPool, PxBaseTask* continuation, PxBaseTask* postPartitioningTask, PxBaseTask* lostTouchTask,
+												PxvNphaseImplementationContext* nPhase, PxU32 maxPatchesPerCM, PxU32 maxArticulationLinks, PxReal dt, const PxVec3& gravity, PxBitMapPinned& changedHandleMap)	PX_OVERRIDE;
+	virtual void						mergeResults()	PX_OVERRIDE;
+	virtual void						setSimulationController(PxsSimulationController* simulationController )	PX_OVERRIDE	{ mSimulationController = simulationController; }
+	virtual PxSolverType::Enum			getSolverType()	const	PX_OVERRIDE	{ return PxSolverType::ePGS;	}
+	//~Context
+
+					void				updatePostKinematic(IG::SimpleIslandManager& simpleIslandManager, PxBaseTask* continuation, PxBaseTask* lostTouchTask, PxU32 maxLinks);
+
+	PX_FORCE_INLINE bool				solveFrictionEveryIteration() const { return mSolveFrictionEveryIteration; }
+
+protected:
+
+#if PX_ENABLE_SIM_STATS
+					void				addThreadStats(const ThreadContext::ThreadSimStats& stats);
+#else
+					PX_CATCH_UNDEFINED_ENABLE_SIM_STATS
+#endif
+
+	// Solver helper-methods
+	/**
+	\brief Computes the unconstrained velocity for a given PxsRigidBody
+	\param[in] atom The PxsRigidBody
+	*/
+	void								computeUnconstrainedVelocity(PxsRigidBody* atom)	const;
+
+	void								setDescFromIndices_Contacts(PxSolverConstraintDesc& desc, const IG::IslandSim& islandSim,
+																	const PxsIndexedInteraction& constraint, PxU32 solverBodyOffset);
+
+	void								setDescFromIndices_Constraints(	PxSolverConstraintDesc& desc, const IG::IslandSim& islandSim, IG::EdgeIndex edgeIndex,
+																		const PxU32* bodyRemapTable, PxU32 solverBodyOffset);
+
+	/**
+	\brief Compute the unconstrained velocity for set of bodies in parallel. This function may spawn additional tasks.
+	\param[in] dt The timestep
+	\param[in] bodyArray The array of body cores
+	\param[in] originalBodyArray The array of PxsRigidBody
+	\param[in] nodeIndexArray The array of island node index
+	\param[in] bodyCount The number of bodies
+	\param[out] solverBodyPool The pool of solver bodies. These are synced with the corresponding body in bodyArray.
+	\param[out] solverBodyDataPool The pool of solver body data. These are synced with the corresponding body in bodyArray
+	\param[out] motionVelocityArray The motion velocities for the bodies
+	\param[out] maxSolverPositionIterations The maximum number of position iterations requested by any body in the island
+	\param[out] maxSolverVelocityIterations The maximum number of velocity iterations requested by any body in the island
+	\param[out] integrateTask The continuation task for any tasks spawned by this function.
+	*/
+	void								preIntegrationParallel(
+											   PxF32 dt,
+											   PxsBodyCore*const* bodyArray,					// INOUT: core body attributes
+											   PxsRigidBody*const* originalBodyArray,			// IN: original body atom names
+											   PxU32 const* nodeIndexArray,						// IN: island node index
+											   PxU32 bodyCount,									// IN: body count
+											   PxSolverBody* solverBodyPool,					// IN: solver atom pool (space preallocated)
+											   PxSolverBodyData* solverBodyDataPool,
+											   Cm::SpatialVector* motionVelocityArray,			// OUT: motion velocities
+											   PxU32& maxSolverPositionIterations,
+											   PxU32& maxSolverVelocityIterations,
+											   PxBaseTask& integrateTask
+											   );
+
+	/**
+	\brief Solves an island in parallel.
+
+	\param[in] params Solver parameter structure
+	*/
+
+	void								solveParallel(SolverIslandParams& params, IG::IslandSim& islandSim, Cm::SpatialVectorF* deltaV, Dy::ErrorAccumulatorEx* errorAccumulator);
+
+	void								integrateCoreParallel(SolverIslandParams& params, Cm::SpatialVectorF* deltaV, IG::IslandSim& islandSim);
+
+	/**
+	\brief Body to represent the world static body.
+	*/
+	PX_ALIGN(16, PxSolverBody			mWorldSolverBody);
+	/**
+	\brief Body data to represent the world static body.
+	*/
+	PX_ALIGN(16, PxSolverBodyData		mWorldSolverBodyData);
+
+	/**
+	\brief Solver constraint desc array
+	*/
+	SolverConstraintDescPool	mSolverConstraintDescPool;
+
+	/**
+	\brief Ordered solver constraint desc array (after partitioning)
+	*/
+	SolverConstraintDescPool	mOrderedSolverConstraintDescPool;
+
+	/**
+	\brief A temporary array of constraint descs used for partitioning
+	*/
+	SolverConstraintDescPool	mTempSolverConstraintDescPool;
+
+	/**
+	\brief Global pool for solver bodies. Kinematic bodies are at the start, and then dynamic bodies
+	*/
+	SolverBodyPool			mSolverBodyPool;
+	/**
+	\brief Global pool for solver body data. Kinematic bodies are at the start, and then dynamic bodies
+	*/
+	SolverBodyDataPool		mSolverBodyDataPool;
+
+private:
+	const bool	mSolveFrictionEveryIteration;
+
+	protected:
+
+	friend class PxsSolverStartTask;
+	friend class PxsSolverAticulationsTask;
+	friend class PxsSolverSetupConstraintsTask;
+	friend class PxsSolverCreateFinalizeConstraintsTask;	
+	friend class PxsSolverConstraintPartitionTask;
+	friend class PxsSolverSetupSolveTask;
+	friend class PxsSolverIntegrateTask;
+	friend class PxsSolverEndTask;
+	friend class PxsSolverConstraintPostProcessTask;
+	friend class PxsForceThresholdTask;
+	friend class SolverArticulationUpdateTask;
+
+	friend void solveParallel(SOLVER_PARALLEL_METHOD_ARGS);
+};
+
+#if PX_VC 
+    #pragma warning(pop)
+#endif
+
+}
+}
+
+#endif
--- a/engine/third_party/physx/source/lowleveldynamics/src/DyDynamicsBase.cpp
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DyDynamicsBase.cpp
@@ -0,0 +1,127 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#include "DyDynamicsBase.h"
+#include "PxsIslandSim.h"
+#include "common/PxProfileZone.h"
+
+using namespace physx;
+using namespace Dy;
+
+DynamicsContextBase::DynamicsContextBase(
+	PxcNpMemBlockPool* memBlockPool,
+	Cm::FlushPool& taskPool,
+	PxvSimStats& simStats,
+	PxVirtualAllocatorCallback* allocatorCallback,
+	PxsMaterialManager* materialManager,
+	IG::SimpleIslandManager& islandManager,
+	PxU64 contextID,
+	PxReal maxBiasCoefficient,
+	PxReal lengthScale,
+	bool enableStabilization,
+	bool useEnhancedDeterminism,
+	bool isResidualReportingEnabled
+	) :
+	Dy::Context			(islandManager, allocatorCallback, simStats, enableStabilization, useEnhancedDeterminism, maxBiasCoefficient, lengthScale, contextID, isResidualReportingEnabled),
+	mThreadContextPool	(memBlockPool),
+	mMaterialManager	(materialManager),
+	mTaskPool			(taskPool),
+	mKinematicCount		(0),
+	mThresholdStreamOut	(0),
+	mCurrentIndex		(0)
+{
+}
+
+DynamicsContextBase::~DynamicsContextBase()
+{
+}
+
+void DynamicsContextBase::resetThreadContexts()
+{
+	PxcThreadCoherentCacheIterator<ThreadContext, PxcNpMemBlockPool> threadContextIt(mThreadContextPool);
+	ThreadContext* threadContext = threadContextIt.getNext();
+
+	while(threadContext != NULL)
+	{
+		threadContext->reset();
+		threadContext = threadContextIt.getNext();
+	}
+}
+
+PxU32 DynamicsContextBase::reserveSharedSolverConstraintsArrays(const IG::IslandSim& islandSim, PxU32 maxArticulationLinks)
+{
+	PX_PROFILE_ZONE("reserveSharedSolverConstraintsArrays", mContextID);
+
+	const PxU32 bodyCount = islandSim.getNbActiveNodes(IG::Node::eRIGID_BODY_TYPE);
+
+	const PxU32 numArtics = islandSim.getNbActiveNodes(IG::Node::eARTICULATION_TYPE);
+
+	const PxU32 numArticulationConstraints = numArtics * maxArticulationLinks; //Just allocate enough memory to fit worst-case maximum size articulations...
+
+	const PxU32 nbActiveContactManagers = islandSim.getNbActiveEdges(IG::Edge::eCONTACT_MANAGER);
+	const PxU32 nbActiveConstraints = islandSim.getNbActiveEdges(IG::Edge::eCONSTRAINT);
+
+	const PxU32 totalConstraintCount = nbActiveConstraints + nbActiveContactManagers + numArticulationConstraints;
+
+	mContactConstraintBatchHeaders.forceSize_Unsafe(0);
+	mContactConstraintBatchHeaders.reserve((totalConstraintCount + 63) & (~63));
+	mContactConstraintBatchHeaders.forceSize_Unsafe(totalConstraintCount);
+
+	mContactList.forceSize_Unsafe(0);
+	mContactList.reserve((nbActiveContactManagers + 63u) & (~63u));
+	mContactList.forceSize_Unsafe(nbActiveContactManagers);
+
+	mMotionVelocityArray.forceSize_Unsafe(0);
+	mMotionVelocityArray.reserve((bodyCount + 63u) & (~63u));
+	mMotionVelocityArray.forceSize_Unsafe(bodyCount);
+
+	mBodyCoreArray.forceSize_Unsafe(0);
+	mBodyCoreArray.reserve((bodyCount + 63u) & (~63u));
+	mBodyCoreArray.forceSize_Unsafe(bodyCount);
+
+	mRigidBodyArray.forceSize_Unsafe(0);
+	mRigidBodyArray.reserve((bodyCount + 63u) & (~63u));
+	mRigidBodyArray.forceSize_Unsafe(bodyCount);
+
+	mArticulationArray.forceSize_Unsafe(0);
+	mArticulationArray.reserve((numArtics + 63u) & (~63u));
+	mArticulationArray.forceSize_Unsafe(numArtics);
+
+	mNodeIndexArray.forceSize_Unsafe(0);
+	mNodeIndexArray.reserve((bodyCount + 63u) & (~63u));
+	mNodeIndexArray.forceSize_Unsafe(bodyCount);
+
+	ThresholdStream& stream = getThresholdStream();
+	stream.forceSize_Unsafe(0);
+	stream.reserve(PxNextPowerOfTwo(nbActiveContactManagers != 0 ? nbActiveContactManagers - 1 : nbActiveContactManagers));
+
+	//flip exceeded force threshold buffer
+	mCurrentIndex = 1 - mCurrentIndex;
+
+	return totalConstraintCount;
+}
--- a/engine/third_party/physx/source/lowleveldynamics/src/DyDynamicsBase.h
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DyDynamicsBase.h
@@ -0,0 +1,122 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef DY_DYNAMICS_BASE_H
+#define DY_DYNAMICS_BASE_H
+
+#include "DyContext.h"
+#include "DyThreadContext.h"
+#include "PxvNphaseImplementationContext.h"
+#include "PxsIslandManagerTypes.h"
+#include "solver/PxSolverDefs.h"
+
+namespace physx
+{
+namespace Cm
+{
+	class FlushPool;
+}
+
+namespace IG
+{
+	class SimpleIslandManager;
+}
+
+namespace Dy
+{
+
+// PT: base class containing code and data shared between PGS and TGS. Ideally this would just be named "DynamicsContext" and the PGS
+// context would have been renamed "DynamicsPGSContext" (to match DynamicsTGSContext) but let's limit the gratuitous changes for now.
+class DynamicsContextBase : public Context
+{
+	PX_NOCOPY(DynamicsContextBase)
+public:
+			DynamicsContextBase(PxcNpMemBlockPool* memBlockPool,
+								Cm::FlushPool& taskPool,
+								PxvSimStats& simStats,
+								PxVirtualAllocatorCallback* allocatorCallback,
+								PxsMaterialManager* materialManager,
+								IG::SimpleIslandManager& islandManager,
+								PxU64 contextID,
+								PxReal maxBiasCoefficient,
+								PxReal lengthScale,
+								bool enableStabilization,
+								bool useEnhancedDeterminism,
+								bool isResidualReportingEnabled
+								);
+
+	virtual	~DynamicsContextBase();
+
+	/**
+	\brief Allocates and returns a thread context object.
+	\return A thread context.
+	*/
+	PX_FORCE_INLINE ThreadContext*		getThreadContext()	{ return mThreadContextPool.get();	}
+
+	/**
+	\brief Returns a thread context to the thread context pool.
+	\param[in] context The thread context to return to the thread context pool.
+	*/
+					void				putThreadContext(ThreadContext* context)	{ mThreadContextPool.put(context);	}
+
+	PX_FORCE_INLINE ThresholdStream&	getThresholdStream()			{ return *mThresholdStream; }
+	PX_FORCE_INLINE PxvSimStats&		getSimStats()					{ return mSimStats;			}
+	PX_FORCE_INLINE Cm::FlushPool&		getTaskPool()					{ return mTaskPool;			}
+	PX_FORCE_INLINE	PxU32				getKinematicCount()		const	{ return mKinematicCount;	}
+
+	PxcThreadCoherentCache<ThreadContext, PxcNpMemBlockPool> mThreadContextPool;	// A thread context pool
+
+	PxsMaterialManager*				mMaterialManager;
+	Cm::FlushPool&					mTaskPool;
+	PxsContactManagerOutputIterator	mOutputIterator;
+
+	PxArray<PxConstraintBatchHeader>	mContactConstraintBatchHeaders;	// An array of contact constraint batch headers
+	PxArray<Cm::SpatialVector>			mMotionVelocityArray;			// Array of motion velocities for all bodies in the scene.
+	PxArray<PxsBodyCore*>				mBodyCoreArray;					// Array of body core pointers for all bodies in the scene.
+	PxArray<PxsRigidBody*>				mRigidBodyArray;				// Array of rigid body pointers for all bodies in the scene.
+	PxArray<FeatherstoneArticulation*>	mArticulationArray;				// Array of articulation pointers for all articulations in the scene.
+
+	ThresholdStream*					mExceededForceThresholdStream[2]; // this store previous and current exceeded force thresholdStream	
+	PxArray<PxU32>						mExceededForceThresholdStreamMask;
+	PxArray<PxU32>						mSolverBodyRemapTable;	// Remaps from the "active island" index to the index within a solver island
+	PxArray<PxU32>						mNodeIndexArray;		// island node index
+	PxArray<PxsIndexedContactManager>	mContactList;
+
+	PxU32	mKinematicCount;		// The total number of kinematic bodies in the scene
+	PxI32	mThresholdStreamOut;	// Atomic counter for the number of threshold stream elements.
+	PxU32	mCurrentIndex;			// this is the index point to the current exceeded force threshold stream
+
+protected:
+	void	resetThreadContexts();
+	PxU32	reserveSharedSolverConstraintsArrays(const IG::IslandSim& islandSim, PxU32 maxArticulationLinks);
+};
+
+}
+}
+
+#endif
--- a/engine/third_party/physx/source/lowleveldynamics/src/DyFeatherstoneArticulation.cpp
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DyFeatherstoneArticulation.cpp
--- a/engine/third_party/physx/source/lowleveldynamics/src/DyFeatherstoneArticulationLink.h
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DyFeatherstoneArticulationLink.h
@@ -0,0 +1,63 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef DY_FEATHERSTONE_ARTICULATION_LINK_H
+#define DY_FEATHERSTONE_ARTICULATION_LINK_H
+
+#include "foundation/PxVec3.h"
+#include "foundation/PxQuat.h"
+#include "foundation/PxTransform.h"
+#include "foundation/PxVecMath.h"
+#include "CmUtils.h"
+#include "CmSpatialVector.h"
+#include "DyVArticulation.h"
+#include "DyFeatherstoneArticulationUtils.h"
+
+namespace physx
+{
+	namespace Dy
+	{
+
+		class ArticulationLinkData
+		{
+			const static PxU32 MaxJointRows = 3;
+		public:
+			ArticulationLinkData()
+			{
+				maxPenBias = 0.f;
+			}
+
+			PxVec3							childToBase;
+			PxReal							maxPenBias;
+
+		};
+
+	}//namespace Dy
+}
+
+#endif
--- a/engine/third_party/physx/source/lowleveldynamics/src/DyFeatherstoneForwardDynamic.cpp
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DyFeatherstoneForwardDynamic.cpp
--- a/engine/third_party/physx/source/lowleveldynamics/src/DyFeatherstoneInverseDynamic.cpp
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DyFeatherstoneInverseDynamic.cpp
--- a/engine/third_party/physx/source/lowleveldynamics/src/DyFrictionCorrelation.cpp
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DyFrictionCorrelation.cpp
@@ -0,0 +1,295 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#include "PxvConfig.h"
+#include "DyCorrelationBuffer.h"
+#include "PxsMaterialManager.h"
+#include "foundation/PxUtilities.h"
+#include "foundation/PxBounds3.h"
+#include "foundation/PxVecMath.h"
+#include "GuBounds.h"
+
+using namespace physx;
+using namespace aos;
+
+namespace physx
+{
+namespace Dy
+{
+static PX_FORCE_INLINE void initContactPatch(CorrelationBuffer::ContactPatchData& patch, PxU16 index, PxReal restitution, PxReal staticFriction, PxReal dynamicFriction,
+	PxU8 flags)
+{
+	patch.start = index;
+	patch.count = 1;
+	patch.next = 0;
+	patch.flags = flags;
+	patch.restitution = restitution;
+	patch.staticFriction = staticFriction;
+	patch.dynamicFriction = dynamicFriction;
+}
+
+bool createContactPatches(CorrelationBuffer& fb, const PxContactPoint* cb, PxU32 contactCount, PxReal normalTolerance)
+{
+	// PT: this rewritten version below doesn't have LHS
+
+	PxU32 contactPatchCount = fb.contactPatchCount;
+	if(contactPatchCount == PxContactBuffer::MAX_CONTACTS)
+		return false;
+	if(contactCount>0)
+	{
+		CorrelationBuffer::ContactPatchData* PX_RESTRICT currentPatchData = fb.contactPatches + contactPatchCount;
+		const PxContactPoint* PX_RESTRICT contacts = cb;
+
+		initContactPatch(fb.contactPatches[contactPatchCount++], PxTo16(0), contacts[0].restitution, 
+			contacts[0].staticFriction, contacts[0].dynamicFriction, PxU8(contacts[0].materialFlags));
+
+		Vec4V minV = V4LoadA(&contacts[0].point.x);
+		Vec4V maxV = minV;
+
+		PxU32 patchIndex = 0;
+		PxU8 count = 1;
+
+		for (PxU32 i = 1; i<contactCount; i++)
+		{
+			const PxContactPoint& curContact = contacts[i];
+			const PxContactPoint& preContact = contacts[patchIndex];
+
+			if(curContact.staticFriction == preContact.staticFriction
+				&& curContact.dynamicFriction == preContact.dynamicFriction
+				&& curContact.restitution == preContact.restitution
+				&& curContact.normal.dot(preContact.normal)>=normalTolerance)
+			{
+				const Vec4V ptV = V4LoadA(&curContact.point.x);
+				minV = V4Min(minV, ptV);
+				maxV = V4Max(maxV, ptV);
+
+				count++;
+			}
+			else
+			{
+				if(contactPatchCount == PxContactBuffer::MAX_CONTACTS)
+					return false;
+				patchIndex = i;
+				currentPatchData->count = count;
+				count = 1;
+				StoreBounds(currentPatchData->patchBounds, minV, maxV);
+				currentPatchData = fb.contactPatches + contactPatchCount;
+
+				initContactPatch(fb.contactPatches[contactPatchCount++], PxTo16(i), curContact.restitution,
+					curContact.staticFriction, curContact.dynamicFriction, PxU8(curContact.materialFlags));
+
+				minV = V4LoadA(&curContact.point.x);
+				maxV = minV;
+			}
+		}
+		if(count!=1)
+			currentPatchData->count = count;
+
+		StoreBounds(currentPatchData->patchBounds, minV, maxV);
+	}
+	fb.contactPatchCount = contactPatchCount;
+	return true;
+}
+
+static PX_FORCE_INLINE void initFrictionPatch(FrictionPatch& p, const PxVec3& worldNormal, const PxTransform& body0Pose, const PxTransform& body1Pose, 
+	PxReal restitution, PxReal staticFriction, PxReal dynamicFriction, PxU8 materialFlags)
+{
+	p.body0Normal = body0Pose.rotateInv(worldNormal);
+	p.body1Normal = body1Pose.rotateInv(worldNormal);
+	p.relativeQuat = body0Pose.q.getConjugate() * body1Pose.q;
+	p.anchorCount = 0;
+	p.broken = 0;
+	p.staticFriction = staticFriction;
+	p.dynamicFriction = dynamicFriction;
+	p.restitution = restitution;
+	p.materialFlags = materialFlags;
+}
+
+bool correlatePatches(CorrelationBuffer& fb, 
+					  const PxContactPoint* cb,
+					  const PxTransform& bodyFrame0,
+					  const PxTransform& bodyFrame1,
+					  PxReal normalTolerance,
+					  PxU32 startContactPatchIndex,
+					  PxU32 startFrictionPatchIndex)
+{
+	bool overflow = false;
+	PxU32 frictionPatchCount = fb.frictionPatchCount;
+
+	for(PxU32 i=startContactPatchIndex;i<fb.contactPatchCount;i++)
+	{
+		CorrelationBuffer::ContactPatchData &c = fb.contactPatches[i];
+		const PxVec3 patchNormal = cb[c.start].normal;
+
+		PxU32 j=startFrictionPatchIndex;
+		for(;j<frictionPatchCount && ((patchNormal.dot(fb.frictionPatchWorldNormal[j]) < normalTolerance) 
+			|| fb.frictionPatches[j].restitution != c.restitution|| fb.frictionPatches[j].staticFriction != c.staticFriction || 
+			fb.frictionPatches[j].dynamicFriction != c.dynamicFriction);j++)
+			;
+
+		if(j==frictionPatchCount)
+		{
+			overflow |= j==CorrelationBuffer::MAX_FRICTION_PATCHES;
+			if(overflow)
+				continue;
+
+			initFrictionPatch(fb.frictionPatches[frictionPatchCount], patchNormal, bodyFrame0, bodyFrame1, c.restitution, c.staticFriction, c.dynamicFriction, c.flags);
+			fb.frictionPatchWorldNormal[j] = patchNormal;
+			fb.frictionPatchContactCounts[frictionPatchCount] = c.count;
+			fb.patchBounds[frictionPatchCount] = c.patchBounds;
+			fb.contactID[frictionPatchCount][0] = 0xffff;
+			fb.contactID[frictionPatchCount++][1] = 0xffff;
+			c.next = CorrelationBuffer::LIST_END;
+		}
+		else
+		{
+			fb.patchBounds[j].include(c.patchBounds);
+			fb.frictionPatchContactCounts[j] += c.count;
+			c.next = PxTo16(fb.correlationListHeads[j]);
+		}
+
+		fb.correlationListHeads[j] = i;
+	}
+
+	fb.frictionPatchCount = frictionPatchCount;
+
+	return overflow;
+}
+
+// run over the friction patches, trying to find two anchors per patch. If we already have
+// anchors that are close, we keep them, which gives us persistent spring behavior
+
+void growPatches(CorrelationBuffer& fb,
+				 const PxContactPoint* cb,
+				 const PxTransform& bodyFrame0,
+				 const PxTransform& bodyFrame1,
+				 PxU32 frictionPatchStartIndex,
+				 PxReal frictionOffsetThreshold)
+{
+	for(PxU32 i=frictionPatchStartIndex;i<fb.frictionPatchCount;i++)
+	{
+		FrictionPatch& fp = fb.frictionPatches[i];
+
+		if (fp.anchorCount == 2 || fb.correlationListHeads[i] == CorrelationBuffer::LIST_END)
+		{
+			const PxReal frictionPatchDiagonalSq = fb.patchBounds[i].getDimensions().magnitudeSquared();
+			const PxReal anchorSqDistance = (fp.body0Anchors[0] - fp.body0Anchors[1]).magnitudeSquared();
+
+			//If the squared distance between the anchors is more than a quarter of the patch diagonal, we can keep, 
+			//otherwise the anchors are potentially clustered around a corner so force a rebuild of the patch
+			if (fb.frictionPatchContactCounts[i] == 0 || (anchorSqDistance * 4.f) >= frictionPatchDiagonalSq)
+				continue;
+
+			fp.anchorCount = 0;
+		}
+
+		PxVec3 worldAnchors[2];
+		PxU16 anchorCount = 0;
+		PxReal pointDistSq = 0.0f, dist0, dist1;
+
+		// if we have an anchor already, keep it
+		if(fp.anchorCount == 1)
+		{
+			worldAnchors[anchorCount++] = bodyFrame0.transform(fp.body0Anchors[0]);
+		}
+
+		const PxReal eps = 1e-8f;
+
+		for(PxU32 patch = fb.correlationListHeads[i]; 
+			patch!=CorrelationBuffer::LIST_END; 
+			patch = fb.contactPatches[patch].next)
+		{
+			CorrelationBuffer::ContactPatchData& cp = fb.contactPatches[patch];
+			for(PxU16 j=0;j<cp.count;j++)
+			{
+				const PxVec3& worldPoint = cb[cp.start+j].point;
+
+				if(cb[cp.start+j].separation < frictionOffsetThreshold)
+				{
+					switch(anchorCount)
+					{
+					case 0:
+						fb.contactID[i][0] = PxU16(cp.start+j);
+						worldAnchors[0] = worldPoint;
+						anchorCount++;
+						break;
+					case 1:
+						pointDistSq = (worldPoint-worldAnchors[0]).magnitudeSquared(); 
+						if (pointDistSq > eps)
+						{
+							fb.contactID[i][1] = PxU16(cp.start+j);
+							worldAnchors[1] = worldPoint;
+							anchorCount++;
+						}
+						break;
+					default: //case 2
+						dist0 = (worldPoint-worldAnchors[0]).magnitudeSquared();
+						dist1 = (worldPoint-worldAnchors[1]).magnitudeSquared();
+						if (dist0 > dist1)
+						{
+							if(dist0 > pointDistSq)
+							{
+								fb.contactID[i][1] = PxU16(cp.start+j);
+								worldAnchors[1] = worldPoint;
+								pointDistSq = dist0;
+							}
+						}
+						else if (dist1 > pointDistSq)
+						{
+							fb.contactID[i][0] = PxU16(cp.start+j);
+							worldAnchors[0] = worldPoint;
+							pointDistSq = dist1;
+						}
+					}
+				}
+			}
+		}
+
+		//PX_ASSERT(anchorCount > 0);
+
+		// add the new anchor(s) to the patch
+		for(PxU32 j = fp.anchorCount; j < anchorCount; j++)
+		{
+			fp.body0Anchors[j] = bodyFrame0.transformInv(worldAnchors[j]);
+			fp.body1Anchors[j] = bodyFrame1.transformInv(worldAnchors[j]);
+		}
+
+		// the block contact solver always reads at least one anchor per patch for performance reasons even if there are no valid patches,
+		// so we need to initialize this in the unexpected case that we have no anchors
+
+		if(anchorCount==0)
+			fp.body0Anchors[0] = fp.body1Anchors[0] = PxVec3(0);
+
+		fp.anchorCount = anchorCount;
+	}
+}
+
+}
+
+}
+
--- a/engine/third_party/physx/source/lowleveldynamics/src/DyFrictionPatch.h
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DyFrictionPatch.h
@@ -0,0 +1,93 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef DY_FRICTION_PATCH_H
+#define DY_FRICTION_PATCH_H
+
+#include "foundation/PxSimpleTypes.h"
+#include "foundation/PxVec3.h"
+#include "foundation/PxIntrinsics.h"
+#include "PxvConfig.h"
+
+namespace physx
+{
+
+namespace Dy
+{
+
+struct FrictionPatch
+{
+	PxU8				broken;				// PT: must be first byte of struct, see "frictionBrokenWritebackByte"
+	PxU8				materialFlags;
+	PxU16				anchorCount;
+	PxReal				restitution;
+	PxReal				staticFriction;
+	PxReal				dynamicFriction;
+	PxVec3				body0Normal;
+	PxVec3				body1Normal;
+	PxVec3				body0Anchors[2];
+	PxVec3				body1Anchors[2];
+	PxQuat				relativeQuat;
+
+	PX_FORCE_INLINE	void	operator = (const FrictionPatch& other)
+	{
+		broken = other.broken;
+		materialFlags = other.materialFlags;
+		anchorCount = other.anchorCount;
+		body0Normal = other.body0Normal;
+		body1Normal = other.body1Normal;
+		body0Anchors[0] = other.body0Anchors[0];   
+		body0Anchors[1] = other.body0Anchors[1];
+		body1Anchors[0] = other.body1Anchors[0];
+		body1Anchors[1] = other.body1Anchors[1];
+		relativeQuat = other.relativeQuat;
+		restitution = other.restitution;
+		staticFriction = other.staticFriction;
+		dynamicFriction = other.dynamicFriction;
+	}
+
+	PX_FORCE_INLINE	void	prefetch()	const
+	{
+		// PT: TODO: revisit this... not very satisfying
+		PxPrefetchLine(this);
+		PxPrefetchLine(this, 128);
+		PxPrefetchLine(this, 256);
+	}
+};  
+
+// PT: ensure that we can safely read the body anchors with V4Loads
+PX_COMPILE_TIME_ASSERT(PX_OFFSET_OF(FrictionPatch, body0Anchors)+sizeof(FrictionPatch::body0Anchors) + 4 <= sizeof(FrictionPatch));
+PX_COMPILE_TIME_ASSERT(PX_OFFSET_OF(FrictionPatch, body1Anchors)+sizeof(FrictionPatch::body1Anchors) + 4 <= sizeof(FrictionPatch));
+
+//PX_COMPILE_TIME_ASSERT(sizeof(FrictionPatch)==80);
+
+}
+
+}
+
+#endif
--- a/engine/third_party/physx/source/lowleveldynamics/src/DyFrictionPatchStreamPair.h
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DyFrictionPatchStreamPair.h
@@ -0,0 +1,125 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef DY_FRICTION_PATCH_STREAM_PAIR_H
+#define DY_FRICTION_PATCH_STREAM_PAIR_H
+
+#include "foundation/PxSimpleTypes.h"
+#include "PxvConfig.h"
+#include "foundation/PxMutex.h"
+#include "foundation/PxArray.h"
+
+// Each narrow phase thread has an input stream of friction patches from the
+// previous frame and an output stream of friction patches which will be
+// saved for next frame. The patches persist for exactly one frame at which
+// point they get thrown away.
+
+
+// There is a stream pair per thread. A contact callback reserves space
+// for its friction patches and gets a cookie in return that can stash
+// for next frame. Cookies are valid for one frame only.
+//
+// note that all friction patches reserved are guaranteed to be contiguous;
+// this might turn out to be a bit inefficient if we often have a large
+// number of friction patches
+
+#include "PxcNpMemBlockPool.h"
+
+namespace physx
+{
+
+class FrictionPatchStreamPair
+{
+public:
+	FrictionPatchStreamPair(PxcNpMemBlockPool& blockPool);
+
+	// reserve can fail and return null. Read should never fail
+	template<class FrictionPatch>
+	FrictionPatch*		reserve(const PxU32 size);
+
+	template<class FrictionPatch>
+	const FrictionPatch* findInputPatches(const PxU8* ptr) const;
+	void					reset();
+
+	PxcNpMemBlockPool& getBlockPool() { return mBlockPool;}
+private:
+	PxcNpMemBlockPool&	mBlockPool;
+	PxcNpMemBlock*		mBlock;
+	PxU32				mUsed;
+
+	FrictionPatchStreamPair& operator=(const FrictionPatchStreamPair&);
+};
+
+PX_FORCE_INLINE FrictionPatchStreamPair::FrictionPatchStreamPair(PxcNpMemBlockPool& blockPool):
+  mBlockPool(blockPool), mBlock(NULL), mUsed(0)
+{
+}
+
+PX_FORCE_INLINE void FrictionPatchStreamPair::reset()
+{
+	mBlock = NULL;
+	mUsed = 0;
+}
+
+// reserve can fail and return null. Read should never fail
+template <class FrictionPatch>
+FrictionPatch* FrictionPatchStreamPair::reserve(const PxU32 size)
+{
+	if(size>PxcNpMemBlock::SIZE)
+	{
+		return reinterpret_cast<FrictionPatch*>(-1);
+	}
+
+	PX_ASSERT(size <= PxcNpMemBlock::SIZE);
+
+	FrictionPatch* ptr = NULL;
+
+	if(mBlock == NULL || mUsed + size > PxcNpMemBlock::SIZE)
+	{
+		mBlock = mBlockPool.acquireFrictionBlock();
+		mUsed = 0;
+	}
+
+	if(mBlock)
+	{
+		ptr = reinterpret_cast<FrictionPatch*>(mBlock->data+mUsed);
+		mUsed += size;
+	}
+
+	return ptr;
+}
+
+template <class FrictionPatch>
+const FrictionPatch* FrictionPatchStreamPair::findInputPatches(const PxU8* ptr) const
+{
+	return reinterpret_cast<const FrictionPatch*>(ptr);
+}
+
+}
+
+#endif
--- a/engine/third_party/physx/source/lowleveldynamics/src/DyPGS.h
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DyPGS.h
@@ -0,0 +1,59 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef DY_PGS_H
+#define DY_PGS_H
+
+#include "foundation/PxPreprocessor.h"
+#include "foundation/PxSimpleTypes.h"
+
+namespace physx
+{
+	struct PxSolverConstraintDesc;
+
+	namespace Dy
+	{
+		struct SolverContext;
+
+		// PT: using defines like we did in Gu (GU_OVERLAP_FUNC_PARAMS, etc). Additionally this gives a
+		// convenient way to find the PGS solver methods, which are scattered in different files and use
+		// the same function names as other functions (with a different signature).
+
+		#define DY_PGS_SOLVE_METHOD_PARAMS		const PxSolverConstraintDesc* desc, PxU32 constraintCount, SolverContext& cache
+
+		typedef void (*SolveBlockMethod)			(DY_PGS_SOLVE_METHOD_PARAMS);
+		typedef void (*SolveWriteBackBlockMethod)	(DY_PGS_SOLVE_METHOD_PARAMS);
+		typedef void (*WriteBackBlockMethod)		(DY_PGS_SOLVE_METHOD_PARAMS);
+
+		extern SolveBlockMethod				gVTableSolveBlock[];
+		extern SolveWriteBackBlockMethod	gVTableSolveWriteBackBlock[];
+		extern SolveBlockMethod				gVTableSolveConcludeBlock[];
+	}
+}
+
+#endif
--- a/engine/third_party/physx/source/lowleveldynamics/src/DyRigidBodyToSolverBody.cpp
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DyRigidBodyToSolverBody.cpp
@@ -0,0 +1,113 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#include "CmUtils.h"
+#include "DySolverBody.h"
+#include "PxsRigidBody.h"
+#include "PxvDynamics.h"
+#include "foundation/PxSIMDHelpers.h"
+
+using namespace physx;
+
+// PT: TODO: SIMDify all this...
+void Dy::copyToSolverBodyData(const PxVec3& linearVelocity, const PxVec3& angularVelocity, PxReal invMass, const PxVec3& invInertia, const PxTransform& globalPose,
+	PxReal maxDepenetrationVelocity, PxReal maxContactImpulse, PxU32 nodeIndex, PxReal reportThreshold, PxSolverBodyData& data, PxU32 lockFlags,
+	PxReal dt, bool gyroscopicForces)
+{
+	data.nodeIndex = nodeIndex;
+
+	const PxVec3 safeSqrtInvInertia = computeSafeSqrtInertia(invInertia);
+
+	const PxMat33Padded rotation(globalPose.q);
+
+	Cm::transformInertiaTensor(safeSqrtInvInertia, rotation, data.sqrtInvInertia);
+
+	PxVec3 ang = angularVelocity;
+	PxVec3 lin = linearVelocity;
+
+	if (gyroscopicForces)
+	{
+		const PxVec3 localInertia(
+			invInertia.x == 0.f ? 0.f : 1.f / invInertia.x,
+			invInertia.y == 0.f ? 0.f : 1.f / invInertia.y,
+			invInertia.z == 0.f ? 0.f : 1.f / invInertia.z);
+
+		const PxVec3 localAngVel = globalPose.q.rotateInv(ang);
+		const PxVec3 origMom = localInertia.multiply(localAngVel);
+		const PxVec3 torque = -localAngVel.cross(origMom);
+		PxVec3 newMom = origMom + torque * dt;
+		const PxReal denom = newMom.magnitude();
+		const PxReal ratio = denom > 0.f ? origMom.magnitude() / denom : 0.f;
+		newMom *= ratio;
+		const PxVec3 newDeltaAngVel = globalPose.q.rotate(invInertia.multiply(newMom) - localAngVel);
+
+		ang += newDeltaAngVel;
+	}	
+	
+	if (lockFlags)
+	{
+		if (lockFlags & PxRigidDynamicLockFlag::eLOCK_LINEAR_X)
+			data.linearVelocity.x = 0.f;
+		if (lockFlags & PxRigidDynamicLockFlag::eLOCK_LINEAR_Y)
+			data.linearVelocity.y = 0.f;
+		if (lockFlags & PxRigidDynamicLockFlag::eLOCK_LINEAR_Z)
+			data.linearVelocity.z = 0.f;
+
+		//KS - technically, we can zero the inertia columns and produce stiffer constraints. However, this can cause numerical issues with the 
+		//joint solver, which is fixed by disabling joint preprocessing and setting minResponseThreshold to some reasonable value > 0. However, until
+		//this is handled automatically, it's probably better not to zero these inertia rows
+		if (lockFlags & PxRigidDynamicLockFlag::eLOCK_ANGULAR_X)
+		{
+			ang.x = 0.f;
+			//data.sqrtInvInertia.column0 = PxVec3(0.f);
+		}
+		if (lockFlags & PxRigidDynamicLockFlag::eLOCK_ANGULAR_Y)
+		{
+			ang.y = 0.f;
+			//data.sqrtInvInertia.column1 = PxVec3(0.f);
+		}
+		if (lockFlags & PxRigidDynamicLockFlag::eLOCK_ANGULAR_Z)
+		{
+			ang.z = 0.f;
+			//data.sqrtInvInertia.column2 = PxVec3(0.f);
+		}
+	}
+
+	PX_ASSERT(lin.isFinite());
+	PX_ASSERT(ang.isFinite());
+
+	data.angularVelocity = ang;
+	data.linearVelocity = lin;
+
+	data.invMass = invMass;
+	data.penBiasClamp = maxDepenetrationVelocity;
+	data.maxContactImpulse = maxContactImpulse;
+	data.body2World = globalPose;
+
+	data.reportThreshold = reportThreshold;
+}
--- a/engine/third_party/physx/source/lowleveldynamics/src/DySleep.cpp
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DySleep.cpp
@@ -0,0 +1,236 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#include "DySleep.h"
+
+using namespace physx;
+
+// PT: TODO: refactor this, parts of the two codepaths are very similar
+
+static PX_FORCE_INLINE PxReal updateWakeCounter(PxsRigidBody* originalBody, PxReal dt, bool enableStabilization, const Cm::SpatialVector& motionVelocity, PxIntBool hasStaticTouch)
+{
+	PxsBodyCore& bodyCore = originalBody->getCore();
+
+	// update the body's sleep state and 
+	const PxReal wakeCounterResetTime = 20.0f*0.02f;
+
+	PxReal wc = bodyCore.wakeCounter;
+
+	if (enableStabilization)
+	{
+		const PxTransform& body2World = bodyCore.body2World;
+
+		// calculate normalized energy: kinetic energy divided by mass
+
+		const PxVec3& t = bodyCore.inverseInertia;
+		const PxVec3 inertia(	t.x > 0.0f ? 1.0f / t.x : 1.0f,
+								t.y > 0.0f ? 1.0f / t.y : 1.0f,
+								t.z > 0.0f ? 1.0f / t.z : 1.0f);
+
+		const PxVec3& sleepLinVelAcc = motionVelocity.linear;
+		const PxVec3 sleepAngVelAcc = body2World.q.rotateInv(motionVelocity.angular);
+
+		// scale threshold by cluster factor (more contacts => higher sleep threshold)
+		//const PxReal clusterFactor = PxReal(1u + getNumUniqueInteractions());
+
+		PxReal invMass = bodyCore.inverseMass;
+		if (invMass == 0.0f)
+			invMass = 1.0f;
+
+		const PxReal angular = sleepAngVelAcc.multiply(sleepAngVelAcc).dot(inertia) * invMass;
+		const PxReal linear = sleepLinVelAcc.magnitudeSquared();
+		const PxReal frameNormalizedEnergy = 0.5f * (angular + linear);
+
+		const PxReal cf = hasStaticTouch ? PxReal(PxMin(10u, bodyCore.numCountedInteractions)) : 0.0f;
+		const PxReal freezeThresh = cf*bodyCore.freezeThreshold;
+
+		originalBody->mFreezeCount = PxMax(originalBody->mFreezeCount - dt, 0.0f);
+		bool settled = true;
+
+		PxReal accelScale = PxMin(1.0f, originalBody->mAccelScale + dt);
+
+		if (frameNormalizedEnergy >= freezeThresh)
+		{
+			settled = false;
+			originalBody->mFreezeCount = PXD_FREEZE_INTERVAL;
+		}
+
+		if (!hasStaticTouch)
+		{
+			accelScale = 1.0f;
+			settled = false;
+		}
+
+		bool freeze = false;
+		if (settled)
+		{
+			//Dampen bodies that are just about to go to sleep
+			if (cf > 1.0f)
+			{
+				const PxReal sleepDamping = PXD_SLEEP_DAMPING;
+				const PxReal sleepDampingTimesDT = sleepDamping*dt;
+				const PxReal d = 1.0f - sleepDampingTimesDT;
+				bodyCore.linearVelocity = bodyCore.linearVelocity * d;
+				bodyCore.angularVelocity = bodyCore.angularVelocity * d;
+				accelScale = accelScale * 0.75f + 0.25f*PXD_FREEZE_SCALE;
+			}
+			freeze = originalBody->mFreezeCount == 0.0f && frameNormalizedEnergy < (bodyCore.freezeThreshold * PXD_FREEZE_TOLERANCE);
+		}
+
+		originalBody->mAccelScale = accelScale;
+
+		const PxU32 wasFrozen = originalBody->mInternalFlags & PxsRigidBody::eFROZEN;
+		PxU16 flags;
+		if(freeze)
+		{
+			//current flag isn't frozen but freeze flag raise so we need to raise the frozen flag in this frame
+			flags = PxU16(PxsRigidBody::eFROZEN);
+			if(!wasFrozen)
+				flags |= PxsRigidBody::eFREEZE_THIS_FRAME;
+			bodyCore.body2World = originalBody->getLastCCDTransform();
+		}
+		else
+		{
+			flags = 0;
+			if(wasFrozen)
+				flags |= PxsRigidBody::eUNFREEZE_THIS_FRAME;
+		}
+		originalBody->mInternalFlags = flags;
+
+		/*KS: New algorithm for sleeping when using stabilization:
+		* Energy *this frame* must be higher than sleep threshold and accumulated energy over previous frames
+		* must be higher than clusterFactor*energyThreshold.
+		*/
+		if (wc < wakeCounterResetTime * 0.5f || wc < dt)
+		{
+			//Accumulate energy
+			originalBody->mSleepLinVelAcc += sleepLinVelAcc;
+			originalBody->mSleepAngVelAcc += sleepAngVelAcc;
+
+			//If energy this frame is high
+			if (frameNormalizedEnergy >= bodyCore.sleepThreshold)
+			{
+				//Compute energy over sleep preparation time
+				const PxReal sleepAngular = originalBody->mSleepAngVelAcc.multiply(originalBody->mSleepAngVelAcc).dot(inertia) * invMass;
+				const PxReal sleepLinear = originalBody->mSleepLinVelAcc.magnitudeSquared();
+				const PxReal normalizedEnergy = 0.5f * (sleepAngular + sleepLinear);
+				const PxReal sleepClusterFactor = PxReal(1u + bodyCore.numCountedInteractions);
+				// scale threshold by cluster factor (more contacts => higher sleep threshold)
+				const PxReal threshold = sleepClusterFactor*bodyCore.sleepThreshold;
+
+				//If energy over sleep preparation time is high
+				if (normalizedEnergy >= threshold)
+				{
+					//Wake up
+					//PX_ASSERT(isActive());
+					originalBody->resetSleepFilter();
+
+					const float factor = bodyCore.sleepThreshold == 0.0f ? 2.0f : PxMin(normalizedEnergy / threshold, 2.0f);
+					PxReal oldWc = wc;
+					wc = factor * 0.5f * wakeCounterResetTime + dt * (sleepClusterFactor - 1.0f);
+					bodyCore.solverWakeCounter = wc;
+					//if (oldWc == 0.0f)  // for the case where a sleeping body got activated by the system (not the user) AND got processed by the solver as well
+					//	notifyNotReadyForSleeping(bodyCore.nodeIndex);
+
+					if (oldWc == 0.0f)
+						originalBody->mInternalFlags |= PxsRigidBody::eACTIVATE_THIS_FRAME;
+
+					return wc;
+				}
+			}
+		}
+	}
+	else 
+	{
+		if (wc < wakeCounterResetTime * 0.5f || wc < dt)
+		{
+			const PxTransform& body2World = bodyCore.body2World;
+
+			// calculate normalized energy: kinetic energy divided by mass
+			const PxVec3& t = bodyCore.inverseInertia;
+			const PxVec3 inertia(	t.x > 0.0f ? 1.0f / t.x : 1.0f,
+									t.y > 0.0f ? 1.0f / t.y : 1.0f,
+									t.z > 0.0f ? 1.0f / t.z : 1.0f);
+
+			const PxVec3& sleepLinVelAcc = motionVelocity.linear;
+			const PxVec3 sleepAngVelAcc = body2World.q.rotateInv(motionVelocity.angular);
+
+			originalBody->mSleepLinVelAcc += sleepLinVelAcc;
+			originalBody->mSleepAngVelAcc += sleepAngVelAcc;
+
+			PxReal invMass = bodyCore.inverseMass;
+			if (invMass == 0.0f)
+				invMass = 1.0f;
+
+			const PxReal angular = originalBody->mSleepAngVelAcc.multiply(originalBody->mSleepAngVelAcc).dot(inertia) * invMass;
+			const PxReal linear = originalBody->mSleepLinVelAcc.magnitudeSquared();
+			const PxReal normalizedEnergy = 0.5f * (angular + linear);
+
+			// scale threshold by cluster factor (more contacts => higher sleep threshold)
+			const PxReal clusterFactor = PxReal(1 + bodyCore.numCountedInteractions);
+			const PxReal threshold = clusterFactor*bodyCore.sleepThreshold;
+
+			if (normalizedEnergy >= threshold)
+			{
+				//PX_ASSERT(isActive());
+				originalBody->resetSleepFilter();
+
+				const float factor = threshold == 0.0f ? 2.0f : PxMin(normalizedEnergy / threshold, 2.0f);
+				PxReal oldWc = wc;
+				wc = factor * 0.5f * wakeCounterResetTime + dt * (clusterFactor - 1.0f);
+				bodyCore.solverWakeCounter = wc;
+				PxU16 flags = 0;
+				if (oldWc == 0.0f)  // for the case where a sleeping body got activated by the system (not the user) AND got processed by the solver as well
+				{
+					flags |= PxsRigidBody::eACTIVATE_THIS_FRAME;
+					//notifyNotReadyForSleeping(bodyCore.nodeIndex);
+				}
+
+				originalBody->mInternalFlags = flags;
+
+				return wc;
+			}
+		}
+	}
+
+	wc = PxMax(wc - dt, 0.0f);
+	bodyCore.solverWakeCounter = wc;
+	return wc;
+}
+
+void Dy::sleepCheck(PxsRigidBody* originalBody, PxReal dt, bool enableStabilization, const Cm::SpatialVector& motionVelocity, PxIntBool hasStaticTouch)
+{
+	const PxReal wc = updateWakeCounter(originalBody, dt, enableStabilization, motionVelocity, hasStaticTouch);
+	if(wc == 0.0f)
+	{
+		//PxsBodyCore& bodyCore = originalBody->getCore();
+		originalBody->mInternalFlags |= PxsRigidBody::eDEACTIVATE_THIS_FRAME;
+		//	notifyReadyForSleeping(bodyCore.nodeIndex);
+		originalBody->resetSleepFilter();
+	}
+}
--- a/engine/third_party/physx/source/lowleveldynamics/src/DySleep.h
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DySleep.h
@@ -0,0 +1,44 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef DY_SLEEP_H
+#define DY_SLEEP_H
+
+#include "PxvDynamics.h"
+#include "PxsRigidBody.h"
+#include "DySleepingConfigulation.h"
+
+namespace physx
+{
+namespace Dy
+{
+	void sleepCheck(PxsRigidBody* originalBody, PxReal dt, bool enableStabilization, const Cm::SpatialVector& motionVelocity, PxIntBool hasStaticTouch);
+}
+}
+
+#endif
--- a/engine/third_party/physx/source/lowleveldynamics/src/DySolverBody.h
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DySolverBody.h
@@ -0,0 +1,68 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+
+#ifndef DY_SOLVER_BODY_H
+#define DY_SOLVER_BODY_H
+
+#include "foundation/PxVec3.h"
+#include "foundation/PxTransform.h"
+#include "foundation/PxMat33.h"
+#include "CmSpatialVector.h"
+#include "solver/PxSolverDefs.h"
+
+namespace physx
+{
+
+class PxsRigidBody;
+struct PxsBodyCore;
+
+namespace Dy
+{
+
+// PT: TODO: make sure this is still needed / replace with V4sqrt
+//This method returns values of 0 when the inertia is 0. This is a bit of a hack but allows us to 
+//represent kinematic objects' velocities in our new format
+PX_FORCE_INLINE PxVec3 computeSafeSqrtInertia(const PxVec3& v)
+{
+	return PxVec3(	v.x == 0.0f ? 0.0f : PxSqrt(v.x),
+					v.y == 0.0f ? 0.0f : PxSqrt(v.y),
+					v.z == 0.0f ? 0.0f : PxSqrt(v.z));
+}
+
+void copyToSolverBodyData(const PxVec3& linearVelocity, const PxVec3& angularVelocity, PxReal invMass, const PxVec3& invInertia, const PxTransform& globalPose,
+	PxReal maxDepenetrationVelocity, PxReal maxContactImpulse, PxU32 nodeIndex, PxReal reportThreshold, PxSolverBodyData& solverBodyData, PxU32 lockFlags,
+	PxReal dt, bool gyroscopicForces);
+
+// PT: TODO: using PxsBodyCore in the interface makes us write less data to the stack for passing arguments, and we can take advantage of the class layout
+// (we know what is aligned or not, we know if it is safe to V4Load vectors, etc). Note that this is what we previously had, which is why PxsBodyCore was still
+// forward-referenced above.
+//void copyToSolverBodyData(PxSolverBodyData& solverBodyData, const PxsBodyCore& core, const PxU32 nodeIndex);
+
+}
+
+}
+
+#endif
--- a/engine/third_party/physx/source/lowleveldynamics/src/DySolverConstraint1D.h
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DySolverConstraint1D.h
@@ -0,0 +1,199 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef DY_SOLVER_CONSTRAINT_1D_H
+#define DY_SOLVER_CONSTRAINT_1D_H
+
+#include "foundation/PxVec3.h"
+#include "PxvConfig.h"
+#include "DySolverConstraintTypes.h"
+#include "DySolverBody.h"
+#include "PxConstraintDesc.h"
+#include "DySolverConstraintDesc.h"
+#include "DyCpuGpu1dConstraint.h"
+
+namespace physx
+{
+
+namespace Dy
+{
+
+// dsequeira: we should probably fork these structures for constraints and extended constraints,
+// since there's a few things that are used for one but not the other
+
+struct SolverConstraint1DHeader
+{
+	PxU8	type;			// enum SolverConstraintType - must be first byte
+	PxU8	count;			// count of following 1D constraints
+	PxU8	dominance;
+	PxU8	breakable;		// indicate whether this constraint is breakable or not						
+
+	PxReal	linBreakImpulse;
+	PxReal	angBreakImpulse;
+	PxReal	invMass0D0;
+	PxVec3	body0WorldOffset;
+	PxReal	invMass1D1;
+	PxReal	linearInvMassScale0;		// only used by articulations
+	PxReal	angularInvMassScale0;		// only used by articulations
+	PxReal	linearInvMassScale1;		// only used by articulations
+	PxReal	angularInvMassScale1;		// only used by articulations
+};
+
+PX_COMPILE_TIME_ASSERT(sizeof(SolverConstraint1DHeader) == 48);
+
+PX_ALIGN_PREFIX(16)
+struct SolverConstraint1D 
+{
+public:
+	PxVec3		lin0;					//!< linear velocity projection (body 0)	
+	PxReal		constant;				//!< constraint constant term
+
+	PxVec3		lin1;					//!< linear velocity projection (body 1)
+	PxReal		unbiasedConstant;		//!< constraint constant term without bias
+
+	PxVec3		ang0;					//!< angular velocity projection (body 0)
+	PxReal		velMultiplier;			//!< constraint velocity multiplier
+
+	PxVec3		ang1;					//!< angular velocity projection (body 1)
+	PxReal		impulseMultiplier;		//!< constraint impulse multiplier
+
+	PxVec3		ang0Writeback;			//!< unscaled angular velocity projection (body 0)
+	PxReal		residualVelIter;
+
+	PxReal		minImpulse;				//!< Lower bound on impulse magnitude	 
+	PxReal		maxImpulse;				//!< Upper bound on impulse magnitude
+	PxReal		appliedForce;			//!< applied force to correct velocity+bias
+private:
+	union 
+	{		
+		PxU32		flags; //Use only the most significant bit (which corresponds to the float's sign bit)
+		PxReal		residualPosIter;
+	};
+public:	
+
+	void setSolverConstants(const Constraint1dSolverConstantsPGS& solverConstants)
+	{
+		constant = solverConstants.constant;
+		unbiasedConstant = solverConstants.unbiasedConstant;
+		velMultiplier = solverConstants.velMultiplier;
+		impulseMultiplier = solverConstants.impulseMultiplier;
+	}
+
+	PX_FORCE_INLINE PxU32 setBit(PxU32 value, PxU32 bitLocation, bool bitState)
+	{
+		if (bitState)
+			return value | (1 << bitLocation);
+		else
+			return value & (~(1 << bitLocation));
+	}
+
+	PX_FORCE_INLINE void setOutputForceFlag(bool outputForce)
+	{
+		flags = setBit(flags, 31, outputForce);
+	}
+	
+	PX_FORCE_INLINE bool getOutputForceFlag() const
+	{
+		return flags & 0x80000000;
+	}
+
+	PX_FORCE_INLINE void setPositionIterationResidual(PxReal residual)
+	{
+		bool flag = getOutputForceFlag();
+		residualPosIter = residual;
+		setOutputForceFlag(flag);
+	}
+	
+	PX_FORCE_INLINE PxReal getPositionIterationResidual() const
+	{
+		return PxAbs(residualPosIter);
+	}
+
+	PX_FORCE_INLINE void setVelocityIterationResidual(PxReal r)
+	{
+		residualVelIter = r;
+	}
+
+} PX_ALIGN_SUFFIX(16); 	
+
+PX_COMPILE_TIME_ASSERT(sizeof(SolverConstraint1D) == 96);
+
+
+struct SolverConstraint1DExt : public SolverConstraint1D
+{
+public:
+	Cm::SpatialVectorV deltaVA;
+	Cm::SpatialVectorV deltaVB;
+};
+
+//PX_COMPILE_TIME_ASSERT(sizeof(SolverConstraint1DExt) == 160);
+
+
+PX_FORCE_INLINE void init(SolverConstraint1DHeader& h, 
+						  PxU8 count, 
+						  bool isExtended,
+						  const PxConstraintInvMassScale& ims)
+{
+	h.type			= PxU8(isExtended ? DY_SC_TYPE_EXT_1D : DY_SC_TYPE_RB_1D);
+	h.count			= count;
+	h.dominance		= 0;
+	h.linearInvMassScale0	= ims.linear0;
+	h.angularInvMassScale0	= ims.angular0;
+	h.linearInvMassScale1	= -ims.linear1;
+	h.angularInvMassScale1	= -ims.angular1;
+}
+
+PX_FORCE_INLINE void init(SolverConstraint1D& c,
+						  const PxVec3& _linear0, const PxVec3& _linear1, 
+						  const PxVec3& _angular0, const PxVec3& _angular1,
+						  PxReal _minImpulse, PxReal _maxImpulse)
+{
+	PX_ASSERT(_linear0.isFinite());
+	PX_ASSERT(_linear1.isFinite());
+	c.lin0					= _linear0;
+	c.lin1					= _linear1;
+	c.ang0					= _angular0;
+	c.ang1					= _angular1;
+	c.minImpulse			= _minImpulse;
+	c.maxImpulse			= _maxImpulse;
+	c.setOutputForceFlag(false);
+	c.appliedForce			= 0;
+	c.residualVelIter		= 0;
+	c.setPositionIterationResidual(0.0f);
+}
+
+PX_FORCE_INLINE bool needsNormalVel(const Px1DConstraint &c)
+{
+	return c.flags & Px1DConstraintFlag::eRESTITUTION
+		|| (c.flags & Px1DConstraintFlag::eSPRING && c.flags & Px1DConstraintFlag::eACCELERATION_SPRING);
+}
+
+}
+}
+
+#endif
--- a/engine/third_party/physx/source/lowleveldynamics/src/DySolverConstraint1D4.h
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DySolverConstraint1D4.h
@@ -0,0 +1,116 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef DY_SOLVER_CONSTRAINT_1D4_H
+#define DY_SOLVER_CONSTRAINT_1D4_H
+
+#include "foundation/PxVec3.h"
+#include "PxvConfig.h"
+#include "DySolverConstraint1D.h"
+
+namespace physx
+{
+
+namespace Dy
+{
+
+struct SolverConstraint1DHeader4
+{
+	PxU8	type;			// enum SolverConstraintType - must be first byte
+	PxU8	pad0[3];	
+	//These counts are the max of the 4 sets of data.
+	//When certain pairs have fewer constraints than others, they are padded with 0s so that no work is performed but 
+	//calculations are still shared (afterall, they're computationally free because we're doing 4 things at a time in SIMD)
+	PxU32	count;
+	PxU8	count0, count1, count2, count3;
+	PxU8	break0, break1, break2, break3;
+
+	aos::Vec4V	linBreakImpulse;
+	aos::Vec4V	angBreakImpulse;
+	aos::Vec4V	invMass0D0;
+	aos::Vec4V	invMass1D1;
+	aos::Vec4V	angD0;
+	aos::Vec4V	angD1;
+
+	aos::Vec4V	body0WorkOffsetX;
+	aos::Vec4V	body0WorkOffsetY;
+	aos::Vec4V	body0WorkOffsetZ;
+};
+
+struct SolverConstraint1DBase4 
+{
+public:
+	aos::Vec4V		lin0X;
+	aos::Vec4V		lin0Y;
+	aos::Vec4V		lin0Z;
+	aos::Vec4V		ang0X;
+	aos::Vec4V		ang0Y;
+	aos::Vec4V		ang0Z;
+	aos::Vec4V		ang0WritebackX;
+	aos::Vec4V		ang0WritebackY;
+	aos::Vec4V		ang0WritebackZ;
+	aos::Vec4V		constant;
+	aos::Vec4V		unbiasedConstant;
+	aos::Vec4V		velMultiplier;
+	aos::Vec4V		impulseMultiplier;
+	aos::Vec4V		minImpulse;
+	aos::Vec4V		maxImpulse;
+	aos::Vec4V		appliedForce;
+	PxU32		flags[4];
+};
+
+struct SolverConstraint1DBase4WithResidual : public SolverConstraint1DBase4
+{
+	aos::Vec4V		residualVelIter;
+	aos::Vec4V		residualPosIter;
+};
+
+PX_COMPILE_TIME_ASSERT(sizeof(SolverConstraint1DBase4) == 272);
+
+struct SolverConstraint1DDynamic4 : public SolverConstraint1DBase4
+{
+	aos::Vec4V		lin1X;
+	aos::Vec4V		lin1Y;
+	aos::Vec4V		lin1Z;
+	aos::Vec4V		ang1X;
+	aos::Vec4V		ang1Y;
+	aos::Vec4V		ang1Z;
+};
+PX_COMPILE_TIME_ASSERT(sizeof(SolverConstraint1DDynamic4) == 368);
+
+struct SolverConstraint1DDynamic4WithResidual : public SolverConstraint1DDynamic4
+{
+	aos::Vec4V		residualVelIter;
+	aos::Vec4V		residualPosIter;
+};
+
+}
+
+}
+
+#endif
--- a/engine/third_party/physx/source/lowleveldynamics/src/DySolverConstraint1DStep.h
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DySolverConstraint1DStep.h
@@ -0,0 +1,282 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef DY_SOLVER_CONSTRAINT_1D_STEP_H
+#define DY_SOLVER_CONSTRAINT_1D_STEP_H
+
+#include "CmSpatialVector.h"
+#include "foundation/PxVec3.h"
+#include "DySolverConstraintTypes.h"
+#include "PxConstraintDesc.h"
+#include "DyCpuGpu1dConstraint.h"
+
+
+namespace physx
+{
+	namespace Sc
+	{
+		class ShapeInteraction;
+	}
+	namespace Dy
+	{
+		struct SolverContactHeaderStep
+		{
+			enum DySolverContactFlags
+			{
+				eHAS_FORCE_THRESHOLDS = 0x1
+			};
+
+			PxU8	type;					//Note: mType should be first as the solver expects a type in the first byte.
+			PxU8	flags;
+			PxU8	numNormalConstr;
+			PxU8	numFrictionConstr;					//4
+
+			PxReal	angDom0;							//8
+			PxReal	angDom1;							//12
+			PxReal	invMass0;							//16
+
+			aos::Vec4V   staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W;		//32
+			PxVec3	normal;															//48
+			
+			PxReal	maxPenBias;														//52
+			PxReal	invMass1;														//56
+			PxReal	minNormalForce;													//60
+			PxU32 broken;															//64
+			PxU8* frictionBrokenWritebackByte;										//68	72
+			Sc::ShapeInteraction* shapeInteraction;									//72	80
+#if !PX_P64_FAMILY
+			PxU32 pad[2];															//80
+#endif		
+
+			PX_FORCE_INLINE aos::FloatV getStaticFriction() const { return aos::V4GetX(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W); }
+			PX_FORCE_INLINE aos::FloatV getDynamicFriction() const { return aos::V4GetY(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W); }
+			PX_FORCE_INLINE aos::FloatV getDominance0() const { return aos::V4GetZ(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W); }
+			PX_FORCE_INLINE aos::FloatV getDominance1() const { return aos::V4GetW(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W); }
+		};
+
+		struct SolverContactPointStep
+		{
+			PxVec3 raXnI;
+			PxF32 separation;
+			PxVec3 rbXnI;
+			PxF32 velMultiplier;
+			PxF32 targetVelocity;
+			PxF32 biasCoefficient;
+			PxF32 recipResponse;
+			PxF32 maxImpulse;
+		};
+
+		struct SolverContactPointStepExt : public SolverContactPointStep
+		{
+			aos::Vec3V linDeltaVA;
+			aos::Vec3V linDeltaVB;
+			aos::Vec3V angDeltaVA;
+			aos::Vec3V angDeltaVB;
+		};
+
+		struct SolverContactFrictionStep
+		{
+			aos::Vec4V normalXYZ_ErrorW;
+			aos::Vec4V raXnI_targetVelW;
+			aos::Vec4V rbXnI_velMultiplierW;
+			PxReal biasScale;
+			PxReal appliedForce;
+			PxReal frictionScale;
+			PxU32 pad[1];
+
+			PX_FORCE_INLINE void setAppliedForce(const aos::FloatV f) { aos::FStore(f, &appliedForce); }
+
+			PX_FORCE_INLINE aos::Vec3V getNormal() const { return aos::Vec3V_From_Vec4V(normalXYZ_ErrorW); }
+			PX_FORCE_INLINE aos::FloatV getAppliedForce() const { return aos::FLoad(appliedForce); }
+		};
+		PX_COMPILE_TIME_ASSERT(sizeof(SolverContactFrictionStep) % 16 == 0);
+
+		struct SolverContactFrictionStepExt : public SolverContactFrictionStep
+		{
+			aos::Vec3V linDeltaVA;
+			aos::Vec3V linDeltaVB;
+			aos::Vec3V angDeltaVA;
+			aos::Vec3V angDeltaVB;
+		};
+
+		struct SolverConstraint1DHeaderStep
+		{
+			PxU8	type;			// enum SolverConstraintType - must be first byte
+			PxU8	count;			// count of following 1D constraints
+			PxU8	dominance;
+			PxU8	breakable;		// indicate whether this constraint is breakable or not						
+			PxReal	linBreakImpulse;
+			PxReal	angBreakImpulse;
+			PxReal	invMass0D0;
+
+			PxVec3	body0WorldOffset;
+			PxReal	invMass1D1;
+
+			PxVec3	rAWorld;
+			PxReal linearInvMassScale0;		// only used by articulations
+
+			PxVec3	rBWorld;
+			PxReal	angularInvMassScale0;
+
+			PxReal	linearInvMassScale1;		// only used by articulations
+			PxReal	angularInvMassScale1;
+			PxU32	pad[2];
+
+			//Ortho axes for body 0, recipResponse in W component
+			PxVec4    angOrthoAxis0_recipResponseW[3];
+			//Ortho axes for body 1, error of body in W component
+			PxVec4    angOrthoAxis1_Error[3];
+		};
+		PX_COMPILE_TIME_ASSERT(PX_OFFSET_OF(SolverConstraint1DHeaderStep, angOrthoAxis0_recipResponseW) % 16 == 0);
+
+
+		PX_FORCE_INLINE void init(SolverConstraint1DHeaderStep& h,
+			PxU8 count,
+			bool isExtended,
+			const PxConstraintInvMassScale& ims)
+		{
+			h.type = PxU8(isExtended ? DY_SC_TYPE_EXT_1D : DY_SC_TYPE_RB_1D);
+			h.count = count;
+			h.dominance = 0;
+			h.linearInvMassScale0 = ims.linear0;
+			h.angularInvMassScale0 = ims.angular0;
+			h.linearInvMassScale1 = -ims.linear1;
+			h.angularInvMassScale1 = -ims.angular1;
+		}
+
+
+		PX_ALIGN_PREFIX(16)
+		struct SolverConstraint1DStep
+		{
+		public:
+			PxVec3		lin0;					//!< linear velocity projection (body 0)	
+			PxReal		error;					//!< constraint error term - must be scaled by biasScale. Can be adjusted at run-time
+
+			PxVec3		lin1;					//!< linear velocity projection (body 1)
+			PxReal		biasScale;				//!< constraint constant bias scale. Constant
+
+			PxVec3		ang0;					//!< angular velocity projection (body 0)
+			PxReal		velMultiplier;			//!< constraint velocity multiplier
+
+			PxVec3		ang1;					//!< angular velocity projection (body 1)
+			PxReal		velTarget;				//!< Scaled target velocity of the constraint drive
+
+			PxReal		minImpulse;				//!< Lower bound on impulse magnitude	 
+			PxReal		maxImpulse;				//!< Upper bound on impulse magnitude
+			PxReal		appliedForce;			//!< applied force to correct velocity+bias
+			PxReal		maxBias;
+
+			PxU32		flags;
+			PxReal		recipResponse;			//Constant. Only used for articulations;
+			//PxReal		angularErrorScale;		//Constant
+			PxReal		residualVelIter;
+		private:
+			union
+			{
+				PxU32		useAngularError; //Use only the most significant bit (which corresponds to the float's sign bit)
+				PxReal		residualPosIter;
+			};
+		public:
+
+			void setSolverConstants(const Constraint1dSolverConstantsTGS& desc)
+			{
+				biasScale = desc.biasScale;
+				error = desc.error;
+				velTarget = desc.targetVel;
+				velMultiplier = desc.velMultiplier;
+			}
+
+
+			PX_FORCE_INLINE PxU32 setBit(PxU32 value, PxU32 bitLocation, bool bitState)
+			{
+				if (bitState)
+					return value | (1 << bitLocation);
+				else
+					return value & (~(1 << bitLocation));
+			}
+
+			PX_FORCE_INLINE void setUseAngularError(bool b)
+			{
+				useAngularError = setBit(useAngularError, 31, b);
+			}
+
+			PX_FORCE_INLINE PxReal getUseAngularError() const
+			{
+				return (useAngularError & 0x80000000) ? 1.0f : 0.0f;
+			}
+
+			PX_FORCE_INLINE void setPositionIterationResidual(PxReal residual)
+			{
+				bool b = getUseAngularError();
+				residualPosIter = residual;
+				setUseAngularError(b);
+			}
+
+			PX_FORCE_INLINE PxReal getPositionIterationResidual() const
+			{
+				return PxAbs(residualPosIter);
+			}
+
+			PX_FORCE_INLINE void setVelocityIterationResidual(PxReal residual)
+			{
+				residualVelIter = residual;
+			}
+
+		} PX_ALIGN_SUFFIX(16);
+
+		struct SolverConstraint1DExtStep : public SolverConstraint1DStep
+		{
+		public:
+			Cm::SpatialVectorV deltaVA;
+			Cm::SpatialVectorV deltaVB;
+		};
+
+		PX_FORCE_INLINE void init(SolverConstraint1DStep& c,
+			const PxVec3& _linear0, const PxVec3& _linear1,
+			const PxVec3& _angular0, const PxVec3& _angular1,
+			PxReal _minImpulse, PxReal _maxImpulse)
+		{
+			PX_ASSERT(_linear0.isFinite());
+			PX_ASSERT(_linear1.isFinite());
+			c.lin0 = _linear0;
+			c.lin1 = _linear1;
+			c.ang0 = _angular0;
+			c.ang1 = _angular1;
+			c.minImpulse = _minImpulse;
+			c.maxImpulse = _maxImpulse;
+			c.flags = 0;
+			c.appliedForce = 0;
+			c.setUseAngularError(true);
+			c.residualVelIter = 0.0f;
+			c.setPositionIterationResidual(0.0f);
+		}
+
+	}//namespace Dy
+}
+
+#endif
--- a/engine/third_party/physx/source/lowleveldynamics/src/DySolverConstraintDesc.h
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DySolverConstraintDesc.h
@@ -0,0 +1,120 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef DY_SOLVER_CONSTRAINT_DESC_H
+#define DY_SOLVER_CONSTRAINT_DESC_H
+
+#include "PxvConfig.h"
+#include "DySolverConstraintTypes.h"
+#include "foundation/PxUtilities.h"
+#include "PxConstraintDesc.h"
+#include "solver/PxSolverDefs.h"
+
+#define PGS_SUPPORT_COMPOUND_CONSTRAINTS	0
+
+namespace physx
+{
+struct PxcNpWorkUnit;
+struct PxsContactManagerOutput;
+
+namespace Dy
+{
+class FeatherstoneArticulation;
+
+// dsequeira: moved this articulation stuff here to sever a build dep on Articulation.h through DyThreadContext.h and onward
+
+#if PGS_SUPPORT_COMPOUND_CONSTRAINTS
+//This class rolls together multiple contact managers into a single contact manager.
+struct CompoundContactManager
+{
+	PxU32 mStartIndex;
+	PxU16 mStride;
+	PxU16 mReducedContactCount;
+	PxU16 originalContactCount;
+	PxU8 originalPatchCount;
+	PxU8 originalStatusFlags;
+
+	PxcNpWorkUnit* unit;			//This is a work unit but the contact buffer has been adjusted to contain all the contacts for all the subsequent pairs
+	PxsContactManagerOutput* cmOutput;
+	PxU8* originalContactPatches;	//This is the original contact buffer that we replaced with a combined buffer	
+	PxU8* originalContactPoints;
+	
+	PxReal* originalForceBuffer;	//This is the original force buffer that we replaced with a combined force buffer
+	PxU16* forceBufferList;			//This is a list of indices from the reduced force buffer to the original force buffers - we need this to fix up the write-backs from the solver	
+
+	PxU8* originalFrictionPatches;	//This is the original friction patches buffer that we replaced with a combined buffer	
+};
+#endif
+
+struct SolverConstraintPrepState
+{
+enum Enum 
+{
+	eOUT_OF_MEMORY,
+	eUNBATCHABLE,
+	eSUCCESS
+};
+};
+
+PX_FORCE_INLINE bool	isArticulationConstraint(const PxSolverConstraintDesc& desc)
+{
+	return desc.linkIndexA != PxSolverConstraintDesc::RIGID_BODY
+		|| desc.linkIndexB != PxSolverConstraintDesc::RIGID_BODY;
+}
+
+PX_FORCE_INLINE void setConstraintLength(PxSolverConstraintDesc& desc, const PxU32 constraintLength)
+{
+	PX_ASSERT(0==(constraintLength & 0x0f));
+	PX_ASSERT(constraintLength <= PX_MAX_U16 * 16);
+	desc.constraintLengthOver16 = PxTo16(constraintLength >> 4);
+}
+
+PX_FORCE_INLINE PxU32 getConstraintLength(const PxSolverConstraintDesc& desc)
+{
+	return PxU32(desc.constraintLengthOver16 << 4);
+}
+
+PX_FORCE_INLINE Dy::FeatherstoneArticulation* getArticulationA(const PxSolverConstraintDesc& desc)
+{
+	return reinterpret_cast<Dy::FeatherstoneArticulation*>(desc.articulationA);
+}
+
+PX_FORCE_INLINE Dy::FeatherstoneArticulation* getArticulationB(const PxSolverConstraintDesc& desc)
+{
+	return reinterpret_cast<Dy::FeatherstoneArticulation*>(desc.articulationB);
+}
+
+PX_COMPILE_TIME_ASSERT(0 == (0x0f & sizeof(PxSolverConstraintDesc)));
+
+#define MAX_PERMITTED_SOLVER_PROGRESS 0xFFFF
+
+}
+
+}
+
+#endif
--- a/engine/third_party/physx/source/lowleveldynamics/src/DySolverConstraintExtShared.h
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DySolverConstraintExtShared.h
@@ -0,0 +1,54 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef DY_SOLVER_CONSTRAINT_EXT_SHARED_H
+#define DY_SOLVER_CONSTRAINT_EXT_SHARED_H
+
+#include "foundation/PxPreprocessor.h"
+#include "foundation/PxVecMath.h"
+#include "DyArticulationContactPrep.h"
+#include "DySolverConstraintDesc.h"
+#include "DySolverConstraint1D.h"
+#include "DySolverContact.h"
+#include "PxcNpWorkUnit.h"
+#include "PxsMaterialManager.h"
+
+namespace physx
+{
+	namespace Dy
+	{
+		FloatV setupExtSolverContact(const SolverExtBody& b0, const SolverExtBody& b1,
+			const FloatV& d0, const FloatV& d1, const FloatV& angD0, const FloatV& angD1, const Vec3V& bodyFrame0p, const Vec3V& bodyFrame1p,
+			const Vec3VArg normal, const FloatVArg invDt, const FloatVArg invDtp8, const FloatVArg dt, const FloatVArg restDistance, const FloatVArg maxPenBias, const FloatVArg restitution,
+			const FloatVArg bounceThreshold, const PxContactPoint& contact, SolverContactPointExt& solverContact, const FloatVArg ccdMaxSeparation,
+			Cm::SpatialVectorF* Z, const Cm::SpatialVectorV& v0, const Cm::SpatialVectorV& v1, const FloatV& cfm, const Vec3VArg solverOffsetSlop,
+			const FloatVArg norVel0, const FloatVArg norVel1, const FloatVArg damping, const BoolVArg accelerationSpring);
+	} //namespace Dy
+}
+
+#endif
--- a/engine/third_party/physx/source/lowleveldynamics/src/DySolverConstraintTypes.h
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DySolverConstraintTypes.h
@@ -0,0 +1,73 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef DY_SOLVER_CONSTRAINT_TYPES_H
+#define DY_SOLVER_CONSTRAINT_TYPES_H
+
+#include "foundation/PxSimpleTypes.h"
+#include "PxvConfig.h"
+
+namespace physx
+{
+
+enum SolverConstraintType
+{
+	DY_SC_TYPE_NONE = 0,
+
+	DY_SC_TYPE_RB_CONTACT,				// RB-only contact
+	DY_SC_TYPE_RB_1D,					// RB-only 1D constraint
+	DY_SC_TYPE_EXT_CONTACT,				// contact involving articulations
+	DY_SC_TYPE_EXT_1D,					// 1D constraint involving articulations
+	DY_SC_TYPE_STATIC_CONTACT,			// RB-only contact where body b is static
+	DY_SC_TYPE_BLOCK_RB_CONTACT,
+	DY_SC_TYPE_BLOCK_STATIC_RB_CONTACT,
+	DY_SC_TYPE_BLOCK_1D,
+	// PT: the following types are only used in the PGS PF solver
+	DY_SC_TYPE_FRICTION,
+	DY_SC_TYPE_STATIC_FRICTION,
+	DY_SC_TYPE_EXT_FRICTION,
+	DY_SC_TYPE_BLOCK_FRICTION,
+	DY_SC_TYPE_BLOCK_STATIC_FRICTION,
+
+	DY_SC_CONSTRAINT_TYPE_COUNT	//Count of the number of different constraint types in the solver
+};
+
+enum SolverConstraintFlags
+{
+	DY_SC_FLAG_OUTPUT_FORCE			= (1<<1),
+	DY_SC_FLAG_KEEP_BIAS			= (1<<2),
+	DY_SC_FLAG_ROT_EQ				= (1<<3),
+	DY_SC_FLAG_ORTHO_TARGET			= (1<<4),
+	DY_SC_FLAG_SPRING				= (1<<5),
+	DY_SC_FLAG_INEQUALITY			= (1<<6),
+	DY_SC_FLAG_ACCELERATION_SPRING	= (1<<7)
+};
+
+}
+
+#endif
--- a/engine/third_party/physx/source/lowleveldynamics/src/DySolverConstraints.cpp
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DySolverConstraints.cpp
--- a/engine/third_party/physx/source/lowleveldynamics/src/DySolverConstraintsBlock.cpp
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DySolverConstraintsBlock.cpp
--- a/engine/third_party/physx/source/lowleveldynamics/src/DySolverConstraintsShared.h
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DySolverConstraintsShared.h
@@ -0,0 +1,182 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. 
+
+#ifndef DY_SOLVER_CONSTRAINT_SHARED_H
+#define DY_SOLVER_CONSTRAINT_SHARED_H
+
+#include "foundation/PxPreprocessor.h"
+#include "foundation/PxVecMath.h"
+
+#include "DySolverBody.h"
+#include "DySolverContact.h"
+#include "DySolverConstraint1D.h"
+#include "DySolverConstraintDesc.h"
+#include "foundation/PxUtilities.h"
+#include "DyConstraint.h"
+#include "foundation/PxAtomic.h"
+
+
+namespace physx
+{
+
+namespace Dy
+{
+	PX_FORCE_INLINE static FloatV solveDynamicContacts(const SolverContactPoint* PX_RESTRICT contacts, PxU32 nbContactPoints, const Vec3VArg contactNormal,
+	const FloatVArg invMassA, const FloatVArg invMassB, const FloatVArg angDom0, const FloatVArg angDom1, Vec3V& linVel0_, Vec3V& angState0_, 
+	Vec3V& linVel1_, Vec3V& angState1_, PxF32* PX_RESTRICT forceBuffer, Dy::ErrorAccumulator* PX_RESTRICT error)
+{
+	Vec3V linVel0 = linVel0_;
+	Vec3V angState0 = angState0_;
+	Vec3V linVel1 = linVel1_;
+	Vec3V angState1 = angState1_;
+	FloatV accumulatedNormalImpulse = FZero();
+
+	const Vec3V delLinVel0 = V3Scale(contactNormal, invMassA);
+	const Vec3V delLinVel1 = V3Scale(contactNormal, invMassB);
+
+	for(PxU32 i=0;i<nbContactPoints;i++)
+	{
+		const SolverContactPoint& c = contacts[i];
+		PxPrefetchLine(&contacts[i], 128);
+
+		const Vec3V raXn = Vec3V_From_Vec4V(c.raXn_velMultiplierW);
+
+		const Vec3V rbXn = Vec3V_From_Vec4V(c.rbXn_maxImpulseW);
+
+		const FloatV appliedForce = FLoad(forceBuffer[i]);
+		const FloatV velMultiplier = c.getVelMultiplier();
+		const FloatV impulseMultiplier = c.getImpulseMultiplier();
+		
+		/*const FloatV targetVel = c.getTargetVelocity();
+		const FloatV nScaledBias = c.getScaledBias();*/
+		const FloatV maxImpulse = c.getMaxImpulse();
+
+		//Compute the normal velocity of the constraint.
+		const Vec3V v0 = V3MulAdd(linVel0, contactNormal, V3Mul(angState0, raXn));
+		const Vec3V v1 = V3MulAdd(linVel1, contactNormal, V3Mul(angState1, rbXn));
+		const FloatV normalVel = V3SumElems(V3Sub(v0, v1));
+
+		const FloatV biasedErr = c.getBiasedErr();//FScaleAdd(targetVel, velMultiplier, nScaledBias);
+
+		//KS - clamp the maximum force
+		const FloatV _deltaF = FMax(FNegScaleSub(normalVel, velMultiplier, biasedErr), FNeg(appliedForce));
+		const FloatV _newForce = FAdd(FMul(impulseMultiplier, appliedForce), _deltaF);
+		const FloatV newForce = FMin(_newForce, maxImpulse);
+		const FloatV deltaF = FSub(newForce, appliedForce);
+		if (error)
+			error->accumulateErrorLocal(deltaF, velMultiplier);
+
+		linVel0 = V3ScaleAdd(delLinVel0, deltaF, linVel0);
+		linVel1 = V3NegScaleSub(delLinVel1, deltaF, linVel1);
+		angState0 = V3ScaleAdd(raXn, FMul(deltaF, angDom0), angState0);
+		angState1 = V3NegScaleSub(rbXn, FMul(deltaF, angDom1), angState1);
+		
+		FStore(newForce, &forceBuffer[i]);
+
+		accumulatedNormalImpulse = FAdd(accumulatedNormalImpulse, newForce);
+	}
+
+	linVel0_ = linVel0;
+	angState0_ = angState0;
+	linVel1_ = linVel1;
+	angState1_ = angState1;
+	return accumulatedNormalImpulse;
+}
+
+PX_FORCE_INLINE static FloatV solveStaticContacts(const SolverContactPoint* PX_RESTRICT contacts, PxU32 nbContactPoints, const Vec3VArg contactNormal,
+	const FloatVArg invMassA, const FloatVArg angDom0, Vec3V& linVel0_, Vec3V& angState0_, PxF32* PX_RESTRICT forceBuffer, Dy::ErrorAccumulator* PX_RESTRICT globalError)
+{
+	Vec3V linVel0 = linVel0_;
+	Vec3V angState0 = angState0_;
+	FloatV accumulatedNormalImpulse = FZero();
+
+	const Vec3V delLinVel0 = V3Scale(contactNormal, invMassA);
+	
+	Dy::ErrorAccumulator error;
+
+	for(PxU32 i=0;i<nbContactPoints;i++)
+	{
+		const SolverContactPoint& c = contacts[i];
+		PxPrefetchLine(&contacts[i],128);
+
+		const Vec3V raXn = Vec3V_From_Vec4V(c.raXn_velMultiplierW);
+		
+		const FloatV appliedForce = FLoad(forceBuffer[i]);
+		const FloatV velMultiplier = c.getVelMultiplier();
+		const FloatV impulseMultiplier = c.getImpulseMultiplier();
+
+		/*const FloatV targetVel = c.getTargetVelocity();
+		const FloatV nScaledBias = c.getScaledBias();*/
+		const FloatV maxImpulse = c.getMaxImpulse();
+		
+		const Vec3V v0 = V3MulAdd(linVel0, contactNormal, V3Mul(angState0, raXn));
+		const FloatV normalVel = V3SumElems(v0);
+
+
+		const FloatV biasedErr = c.getBiasedErr();//FScaleAdd(targetVel, velMultiplier, nScaledBias);
+
+		// still lots to do here: using loop pipelining we can interweave this code with the
+		// above - the code here has a lot of stalls that we would thereby eliminate
+		const FloatV _deltaF = FMax(FNegScaleSub(normalVel, velMultiplier, biasedErr), FNeg(appliedForce));
+		const FloatV _newForce = FAdd(FMul(appliedForce, impulseMultiplier), _deltaF);
+		const FloatV newForce = FMin(_newForce, maxImpulse);
+		const FloatV deltaF = FSub(newForce, appliedForce);
+
+		if (globalError)
+			error.accumulateErrorLocal(deltaF, velMultiplier);
+
+		linVel0 = V3ScaleAdd(delLinVel0, deltaF, linVel0);
+		angState0 = V3ScaleAdd(raXn, FMul(deltaF, angDom0), angState0);
+
+		FStore(newForce, &forceBuffer[i]);
+
+		accumulatedNormalImpulse = FAdd(accumulatedNormalImpulse, newForce);
+	}
+
+	linVel0_ = linVel0;
+	angState0_ = angState0;
+
+	if (globalError)
+		globalError->combine(error);
+
+	return accumulatedNormalImpulse;
+}
+
+FloatV solveExtContacts(const SolverContactPointExt* PX_RESTRICT contacts, PxU32 nbContactPoints, const Vec3VArg contactNormal,
+	Vec3V& linVel0, Vec3V& angVel0,
+	Vec3V& linVel1, Vec3V& angVel1,
+	Vec3V& li0, Vec3V& ai0,
+	Vec3V& li1, Vec3V& ai1,
+	PxF32* PX_RESTRICT appliedForceBuffer,
+	Dy::ErrorAccumulator* PX_RESTRICT error);
+
+}
+
+}
+
+#endif
--- a/engine/third_party/physx/source/lowleveldynamics/src/DySolverContact.h
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DySolverContact.h
@@ -0,0 +1,185 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef DY_SOLVER_CONTACT_H
+#define DY_SOLVER_CONTACT_H
+
+#include "foundation/PxSimpleTypes.h"
+#include "foundation/PxVec3.h"
+#include "PxvConfig.h"
+#include "foundation/PxVecMath.h"
+
+namespace physx
+{
+
+using namespace aos;
+
+namespace Sc
+{
+	class ShapeInteraction;
+}
+/**
+\brief A header to represent a friction patch for the solver.
+*/
+
+namespace Dy
+{
+
+struct SolverContactHeader
+{
+	enum DySolverContactFlags
+	{
+		eHAS_FORCE_THRESHOLDS = 0x1
+	};
+
+	PxU8	type;					//Note: mType should be first as the solver expects a type in the first byte.
+	PxU8	flags;	
+	PxU8	numNormalConstr;
+	PxU8	numFrictionConstr;					//4
+
+	PxReal	angDom0;							//8
+	PxReal	angDom1;							//12
+	PxReal	invMass0;							//16
+
+	Vec4V   staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W;		//32
+	//KS - minAppliedImpulseForFrictionW is non-zero only for articulations. This is a workaround for a case in articulations where
+	//the impulse is propagated such that many links do not apply friction because their normal forces were corrected by the solver in a previous
+	//link. This results in some links sliding unnaturally. This occurs with prismatic or revolute joints where the impulse propagation one one link
+	//resolves the normal constraint on all links
+	Vec4V	normal_minAppliedImpulseForFrictionW;							//48 
+																			
+	PxReal	invMass1;														//52
+	PxU32	broken;															//56
+	PxU8*	frictionBrokenWritebackByte;									//60	64
+	Sc::ShapeInteraction* shapeInteraction;									//64	72
+#if PX_P64_FAMILY
+	PxU32	pad[2];															//64	80
+#endif // PX_X64
+
+	PX_FORCE_INLINE void setStaticFriction(const FloatV f)	{ staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W = V4SetX(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, f);	}
+	PX_FORCE_INLINE void setDynamicFriction(const FloatV f)	{ staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W = V4SetY(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, f);	}
+	PX_FORCE_INLINE void setDominance0(const FloatV f)		{ staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W = V4SetZ(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, f);	}
+	PX_FORCE_INLINE void setDominance1(const FloatV f)		{ staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W = V4SetW(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, f);	}
+
+	PX_FORCE_INLINE FloatV getStaticFriction() const		{ return V4GetX(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W);	}
+	PX_FORCE_INLINE FloatV getDynamicFriction() const		{ return V4GetY(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W);	}
+	PX_FORCE_INLINE FloatV getDominance0() const			{ return V4GetZ(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W);	}
+	PX_FORCE_INLINE FloatV getDominance1() const			{ return V4GetW(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W);	}
+
+	PX_FORCE_INLINE void setStaticFriction(PxF32 f)			{ V4WriteX(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, f);	}
+	PX_FORCE_INLINE void setDynamicFriction(PxF32 f)		{ V4WriteY(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, f);	}
+	PX_FORCE_INLINE void setDominance0(PxF32 f)				{ V4WriteZ(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, f);	}
+	PX_FORCE_INLINE void setDominance1(PxF32 f)				{ V4WriteW(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, f);	}
+
+	PX_FORCE_INLINE PxF32 getStaticFrictionPxF32() const	{ return V4ReadX(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W);	}
+	PX_FORCE_INLINE PxF32 getDynamicFrictionPxF32() const	{ return V4ReadY(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W);	}
+	PX_FORCE_INLINE PxF32 getDominance0PxF32() const		{ return V4ReadZ(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W);	}
+	PX_FORCE_INLINE PxF32 getDominance1PxF32() const		{ return V4ReadW(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W);	}
+}; 
+
+#if !PX_P64_FAMILY
+PX_COMPILE_TIME_ASSERT(sizeof(SolverContactHeader) == 64);
+#else
+PX_COMPILE_TIME_ASSERT(sizeof(SolverContactHeader) == 80);
+#endif
+
+/**
+\brief A single rigid body contact point for the solver.
+*/
+struct SolverContactPoint
+{
+	Vec4V raXn_velMultiplierW;
+	Vec4V rbXn_maxImpulseW;
+
+	PxF32 biasedErr;
+	PxF32 unbiasedErr;
+	PxF32 impulseMultiplier;
+	PxU32 pad;
+
+	PX_FORCE_INLINE FloatV getVelMultiplier() const			{ return V4GetW(raXn_velMultiplierW);	}
+	PX_FORCE_INLINE FloatV getImpulseMultiplier() const		{ return FLoad(impulseMultiplier);		}
+
+	PX_FORCE_INLINE FloatV getBiasedErr() const				{ return FLoad(biasedErr);				}
+	PX_FORCE_INLINE FloatV getMaxImpulse() const			{ return V4GetW(rbXn_maxImpulseW);		}
+}; 
+
+PX_COMPILE_TIME_ASSERT(sizeof(SolverContactPoint) == 48);
+
+/**
+\brief A single extended articulation contact point for the solver.
+*/
+struct SolverContactPointExt : public SolverContactPoint
+{
+	Vec3V linDeltaVA;
+	Vec3V angDeltaVA;
+	Vec3V linDeltaVB;
+	Vec3V angDeltaVB;
+};
+
+PX_COMPILE_TIME_ASSERT(sizeof(SolverContactPointExt) == 112);
+
+
+/**
+\brief A single friction constraint for the solver.
+*/
+struct SolverContactFriction
+{
+	// PT: TODO: there's room for 3 floats in the padding bytes so we could just stick appliedForce / velMultiplier / bias there
+	// and avoid doing all the data packing / unpacking for these members...
+	Vec4V normalXYZ_appliedForceW;		//16
+	Vec4V raXnXYZ_velMultiplierW;		//32
+	Vec4V rbXnXYZ_biasW;				//48
+	PxReal targetVel;					//52
+	PxU32 mPad[3];						//64
+
+	PX_FORCE_INLINE void setAppliedForce(const FloatV f)	{ normalXYZ_appliedForceW = V4SetW(normalXYZ_appliedForceW,f);	}
+	PX_FORCE_INLINE void setBias(const FloatV f)			{ rbXnXYZ_biasW = V4SetW(rbXnXYZ_biasW,f);						}
+
+	PX_FORCE_INLINE Vec3V getNormal() const { return Vec3V_From_Vec4V(normalXYZ_appliedForceW); }
+	PX_FORCE_INLINE FloatV getAppliedForce() const { return V4GetW(normalXYZ_appliedForceW); }
+}; 
+
+PX_COMPILE_TIME_ASSERT(sizeof(SolverContactFriction) == 64);
+
+/**
+\brief A single extended articulation friction constraint for the solver.
+*/
+struct SolverContactFrictionExt : public SolverContactFriction
+{
+	Vec3V linDeltaVA;
+	Vec3V angDeltaVA;
+	Vec3V linDeltaVB;
+	Vec3V angDeltaVB;
+};
+PX_COMPILE_TIME_ASSERT(sizeof(SolverContactFrictionExt) == 128);
+
+}
+
+}
+
+#endif
--- a/engine/third_party/physx/source/lowleveldynamics/src/DySolverContact4.h
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DySolverContact4.h
@@ -0,0 +1,170 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef DY_SOLVER_CONTACT4_H
+#define DY_SOLVER_CONTACT4_H
+
+#include "foundation/PxSimpleTypes.h"
+#include "foundation/PxVec3.h"
+#include "PxvConfig.h"
+#include "foundation/PxVecMath.h"
+#include "DySolverContact.h"
+
+namespace physx
+{
+
+namespace Sc
+{
+	class ShapeInteraction;
+}
+	
+namespace Dy
+{
+
+/**
+\brief Batched SOA contact data. Note, we don't support batching with extended contacts for the simple reason that handling multiple articulations would be complex.
+*/
+struct SolverContactHeader4
+{
+	enum
+	{
+		eHAS_MAX_IMPULSE = 1 << 0,
+		eHAS_TARGET_VELOCITY = 1 << 1
+	};
+
+	PxU8	type;					//Note: mType should be first as the solver expects a type in the first byte.
+	PxU8	numNormalConstr;
+	PxU8	numFrictionConstr;
+	PxU8	flag;
+
+	PxU8	flags[4];
+	//These counts are the max of the 4 sets of data.
+	//When certain pairs have fewer patches/contacts than others, they are padded with 0s so that no work is performed but 
+	//calculations are still shared (afterall, they're computationally free because we're doing 4 things at a time in SIMD)
+
+	//KS - used for write-back only
+	PxU8	numNormalConstr0, numNormalConstr1, numNormalConstr2, numNormalConstr3;
+	PxU8	numFrictionConstr0, numFrictionConstr1, numFrictionConstr2, numFrictionConstr3;			
+
+	Vec4V	restitution;																			
+	Vec4V   staticFriction;
+	Vec4V	dynamicFriction;
+	//Technically, these mass properties could be pulled out into a new structure and shared. For multi-manifold contacts,
+	//this would save 64 bytes per-manifold after the cost of the first manifold
+	Vec4V	invMass0D0;
+	Vec4V	invMass1D1;
+	Vec4V	angDom0;
+	Vec4V	angDom1;
+	//Normal is shared between all contacts in the batch. This will save some memory!
+	Vec4V	normalX;
+	Vec4V	normalY;
+	Vec4V	normalZ;
+
+	Sc::ShapeInteraction* shapeInteraction[4];		//192 or 208
+}; 
+
+#if !PX_P64_FAMILY
+PX_COMPILE_TIME_ASSERT(sizeof(SolverContactHeader4) == 192);
+#else
+PX_COMPILE_TIME_ASSERT(sizeof(SolverContactHeader4) == 208);
+#endif
+
+
+/**
+\brief This represents a batch of 4 contacts with static rolled into a single structure
+*/
+struct SolverContactBatchPointBase4
+{
+	Vec4V raXnX;
+	Vec4V raXnY;
+	Vec4V raXnZ;
+	Vec4V velMultiplier;
+	Vec4V scaledBias;
+	Vec4V biasedErr;
+	Vec4V impulseMultiplier;
+};
+PX_COMPILE_TIME_ASSERT(sizeof(SolverContactBatchPointBase4) == 112);
+
+/**
+\brief Contains the additional data required to represent 4 contacts between 2 dynamic bodies
+\see SolverContactBatchPointBase4
+*/
+struct SolverContactBatchPointDynamic4 : public SolverContactBatchPointBase4
+{	
+	Vec4V rbXnX;
+	Vec4V rbXnY;
+	Vec4V rbXnZ;
+}; 
+PX_COMPILE_TIME_ASSERT(sizeof(SolverContactBatchPointDynamic4) == 160);
+
+/**
+\brief This represents the shared information of a batch of 4 friction constraints
+*/
+struct SolverFrictionSharedData4
+{
+	BoolV broken;
+	PxU8* frictionBrokenWritebackByte[4];
+	Vec4V normalX[2];
+	Vec4V normalY[2];
+	Vec4V normalZ[2];
+};
+#if !PX_P64_FAMILY
+PX_COMPILE_TIME_ASSERT(sizeof(SolverFrictionSharedData4) == 128);
+#endif
+
+/**
+\brief This represents a batch of 4 friction constraints with static rolled into a single structure
+*/
+struct SolverContactFrictionBase4
+{
+	Vec4V raXnX;
+	Vec4V raXnY;
+	Vec4V raXnZ;
+	Vec4V scaledBias;
+	Vec4V velMultiplier;
+	Vec4V targetVelocity;
+};
+PX_COMPILE_TIME_ASSERT(sizeof(SolverContactFrictionBase4) == 96);
+
+/**
+\brief Contains the additional data required to represent 4 friction constraints between 2 dynamic bodies
+\see SolverContactFrictionBase4
+*/
+struct SolverContactFrictionDynamic4 : public SolverContactFrictionBase4
+{
+	Vec4V rbXnX;
+	Vec4V rbXnY;
+	Vec4V rbXnZ;
+}; 
+PX_COMPILE_TIME_ASSERT(sizeof(SolverContactFrictionDynamic4) == 144);
+
+}
+
+}
+
+#endif
--- a/engine/third_party/physx/source/lowleveldynamics/src/DySolverContext.h
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DySolverContext.h
@@ -0,0 +1,68 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef DY_SOLVER_CONTEXT_H
+#define DY_SOLVER_CONTEXT_H
+
+#include "DyResidualAccumulator.h"
+
+namespace physx
+{
+	struct PxSolverBodyData;
+
+namespace Dy
+{
+	struct ThresholdStreamElement;
+	
+struct SolverContext
+{
+	bool									doFriction;
+	bool									writeBackIteration;
+	bool									isPositionIteration;
+
+	// for threshold stream output
+	ThresholdStreamElement*					mThresholdStream;
+	PxU32									mThresholdStreamIndex;
+	PxU32									mThresholdStreamLength;
+	PxSolverBodyData*						solverBodyArray;
+
+	ThresholdStreamElement* PX_RESTRICT		mSharedThresholdStream;
+	PxU32									mSharedThresholdStreamLength;
+	PxI32*									mSharedOutThresholdPairs;
+	Cm::SpatialVectorF*						deltaV; // used temporarily in PxcFsFlushVelocities
+
+	Dy::ErrorAccumulator*					contactErrorAccumulator;
+
+	SolverContext() : contactErrorAccumulator(NULL) { }
+};
+
+}
+
+}
+
+#endif
--- a/engine/third_party/physx/source/lowleveldynamics/src/DySolverControl.cpp
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DySolverControl.cpp
@@ -0,0 +1,646 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#include "foundation/PxPreprocessor.h"
+#include "foundation/PxAtomic.h"
+#include "DySolverBody.h"
+#include "DyThresholdTable.h"
+#include "DySolverControl.h"
+#include "DyArticulationPImpl.h"
+#include "DySolverContext.h"
+#include "DyCpuGpuArticulation.h"
+
+namespace physx
+{
+namespace Dy
+{
+void solve1DBlock					(DY_PGS_SOLVE_METHOD_PARAMS);
+void solveContactBlock				(DY_PGS_SOLVE_METHOD_PARAMS);
+void solveExtContactBlock			(DY_PGS_SOLVE_METHOD_PARAMS);
+void solveExt1DBlock				(DY_PGS_SOLVE_METHOD_PARAMS);
+void solveContact_BStaticBlock		(DY_PGS_SOLVE_METHOD_PARAMS);
+void solveContactPreBlock			(DY_PGS_SOLVE_METHOD_PARAMS);
+void solveContactPreBlock_Static	(DY_PGS_SOLVE_METHOD_PARAMS);
+void solve1D4_Block					(DY_PGS_SOLVE_METHOD_PARAMS);
+
+void solve1DConcludeBlock				(DY_PGS_SOLVE_METHOD_PARAMS);
+void solveContactConcludeBlock			(DY_PGS_SOLVE_METHOD_PARAMS);
+void solveExtContactConcludeBlock		(DY_PGS_SOLVE_METHOD_PARAMS);
+void solveExt1DConcludeBlock			(DY_PGS_SOLVE_METHOD_PARAMS);
+void solveContact_BStaticConcludeBlock	(DY_PGS_SOLVE_METHOD_PARAMS);
+void solveContactPreBlock_Conclude		(DY_PGS_SOLVE_METHOD_PARAMS);
+void solveContactPreBlock_ConcludeStatic(DY_PGS_SOLVE_METHOD_PARAMS);
+void solve1D4Block_Conclude				(DY_PGS_SOLVE_METHOD_PARAMS);
+
+void solve1DBlockWriteBack				(DY_PGS_SOLVE_METHOD_PARAMS);
+void solveContactBlockWriteBack			(DY_PGS_SOLVE_METHOD_PARAMS);
+void solveExtContactBlockWriteBack		(DY_PGS_SOLVE_METHOD_PARAMS);
+void solveExt1DBlockWriteBack			(DY_PGS_SOLVE_METHOD_PARAMS);
+void solveContact_BStaticBlockWriteBack	(DY_PGS_SOLVE_METHOD_PARAMS);
+void solveContactPreBlock_WriteBack		(DY_PGS_SOLVE_METHOD_PARAMS);
+void solveContactPreBlock_WriteBackStatic(DY_PGS_SOLVE_METHOD_PARAMS);
+void solve1D4Block_WriteBack			(DY_PGS_SOLVE_METHOD_PARAMS);
+
+// PT: not sure what happened, these ones were declared but not actually used
+//void writeBack1DBlock				(DY_PGS_SOLVE_METHOD_PARAMS);
+//void contactBlockWriteBack		(DY_PGS_SOLVE_METHOD_PARAMS);
+//void extContactBlockWriteBack		(DY_PGS_SOLVE_METHOD_PARAMS);
+//void ext1DBlockWriteBack			(DY_PGS_SOLVE_METHOD_PARAMS);
+//void contactPreBlock_WriteBack	(DY_PGS_SOLVE_METHOD_PARAMS);
+//void writeBack1D4Block			(DY_PGS_SOLVE_METHOD_PARAMS);
+
+SolveBlockMethod gVTableSolveBlock[] PX_UNUSED_ATTRIBUTE = 
+{
+	0,
+	solveContactBlock,				// DY_SC_TYPE_RB_CONTACT
+	solve1DBlock,					// DY_SC_TYPE_RB_1D
+	solveExtContactBlock,			// DY_SC_TYPE_EXT_CONTACT
+	solveExt1DBlock,				// DY_SC_TYPE_EXT_1D
+	solveContact_BStaticBlock,		// DY_SC_TYPE_STATIC_CONTACT
+	solveContactPreBlock,			// DY_SC_TYPE_BLOCK_RB_CONTACT
+	solveContactPreBlock_Static,	// DY_SC_TYPE_BLOCK_STATIC_RB_CONTACT
+	solve1D4_Block,					// DY_SC_TYPE_BLOCK_1D,
+};
+
+SolveWriteBackBlockMethod gVTableSolveWriteBackBlock[] PX_UNUSED_ATTRIBUTE = 
+{
+	0,
+	solveContactBlockWriteBack,				// DY_SC_TYPE_RB_CONTACT
+	solve1DBlockWriteBack,					// DY_SC_TYPE_RB_1D
+	solveExtContactBlockWriteBack,			// DY_SC_TYPE_EXT_CONTACT
+	solveExt1DBlockWriteBack,				// DY_SC_TYPE_EXT_1D
+	solveContact_BStaticBlockWriteBack,		// DY_SC_TYPE_STATIC_CONTACT
+	solveContactPreBlock_WriteBack,			// DY_SC_TYPE_BLOCK_RB_CONTACT
+	solveContactPreBlock_WriteBackStatic,	// DY_SC_TYPE_BLOCK_STATIC_RB_CONTACT
+	solve1D4Block_WriteBack,				// DY_SC_TYPE_BLOCK_1D,
+};
+
+SolveBlockMethod gVTableSolveConcludeBlock[] PX_UNUSED_ATTRIBUTE = 
+{
+	0,
+	solveContactConcludeBlock,				// DY_SC_TYPE_RB_CONTACT
+	solve1DConcludeBlock,					// DY_SC_TYPE_RB_1D
+	solveExtContactConcludeBlock,			// DY_SC_TYPE_EXT_CONTACT
+	solveExt1DConcludeBlock,				// DY_SC_TYPE_EXT_1D
+	solveContact_BStaticConcludeBlock,		// DY_SC_TYPE_STATIC_CONTACT
+	solveContactPreBlock_Conclude,			// DY_SC_TYPE_BLOCK_RB_CONTACT
+	solveContactPreBlock_ConcludeStatic,	// DY_SC_TYPE_BLOCK_STATIC_RB_CONTACT
+	solve1D4Block_Conclude,					// DY_SC_TYPE_BLOCK_1D,
+};
+
+void solveV_Blocks(SolverIslandParams& params, bool solveFrictionEveryIteration)
+{
+	const PxF32 biasCoefficient = DY_ARTICULATION_PGS_BIAS_COEFFICIENT;
+	const bool isTGS = false;
+	const bool residualReportingActive = params.errorAccumulator != NULL;
+
+	const PxI32 TempThresholdStreamSize = 32;
+	ThresholdStreamElement tempThresholdStream[TempThresholdStreamSize];
+
+	SolverContext cache;
+	cache.solverBodyArray			= params.bodyDataList;
+	cache.mThresholdStream			= tempThresholdStream;
+	cache.mThresholdStreamLength	= TempThresholdStreamSize;
+	cache.mThresholdStreamIndex		= 0;
+	cache.writeBackIteration		= false;
+	cache.deltaV					= params.deltaV;
+
+	const PxI32 batchCount = PxI32(params.numConstraintHeaders);
+
+	const PxSolverBody* PX_RESTRICT bodyListStart = params.bodyListStart;
+	const PxU32 bodyListSize = params.bodyListSize;
+
+	Cm::SpatialVector* PX_RESTRICT motionVelocityArray = params.motionVelocityArray;
+
+	const PxU32 velocityIterations = params.velocityIterations;
+	const PxU32 positionIterations = params.positionIterations;
+
+	const PxU32 numConstraintHeaders = params.numConstraintHeaders;
+	const PxU32 articulationListSize = params.articulationListSize;
+
+	ArticulationSolverDesc* PX_RESTRICT articulationListStart = params.articulationListStart;
+
+	PX_ASSERT(positionIterations >= 1);
+
+	if(numConstraintHeaders == 0)
+	{
+		solveNoContactsCase(bodyListSize, bodyListStart, motionVelocityArray,
+							articulationListSize, articulationListStart, cache.deltaV,
+							positionIterations, velocityIterations, params.dt, params.invDt, residualReportingActive);
+		return;
+	}
+
+	BatchIterator contactIterator(params.constraintBatchHeaders, params.numConstraintHeaders);
+
+	const PxSolverConstraintDesc* PX_RESTRICT constraintList = params.constraintList;
+
+	//0-(n-1) iterations
+	PxI32 normalIter = 0;
+
+	cache.isPositionIteration = true;
+	cache.contactErrorAccumulator = residualReportingActive ? &params.errorAccumulator->mPositionIterationErrorAccumulator : NULL;
+	for (PxU32 iteration = positionIterations; iteration > 0; iteration--)	//decreasing positive numbers == position iters
+	{
+		if (cache.contactErrorAccumulator)
+			cache.contactErrorAccumulator->reset();
+
+		cache.doFriction = solveFrictionEveryIteration ? true : iteration <= 3;
+
+		SolveBlockParallel(constraintList, batchCount, normalIter * batchCount, batchCount, 
+			cache, contactIterator, iteration == 1 ? gVTableSolveConcludeBlock : gVTableSolveBlock, normalIter);
+
+		for (PxU32 i = 0; i < articulationListSize; ++i)
+			articulationListStart[i].articulation->solveInternalConstraints(params.dt, params.dt, params.invDt, false, isTGS, 0.f, biasCoefficient, residualReportingActive);
+
+		++normalIter;
+	}
+
+	saveMotionVelocities(bodyListSize, bodyListStart, motionVelocityArray);
+	
+	for (PxU32 i = 0; i < articulationListSize; i++)
+		ArticulationPImpl::saveVelocity(articulationListStart[i].articulation, cache.deltaV);
+
+	const PxI32 velItersMinOne = (PxI32(velocityIterations)) - 1;
+
+	cache.isPositionIteration = false;
+	cache.contactErrorAccumulator = residualReportingActive ? &params.errorAccumulator->mVelocityIterationErrorAccumulator : NULL;
+	for(PxI32 iteration = 0; iteration < velItersMinOne; ++iteration)
+	{
+		if (cache.contactErrorAccumulator)
+			cache.contactErrorAccumulator->reset();
+
+		SolveBlockParallel(constraintList, batchCount, normalIter * batchCount, batchCount, 
+			cache, contactIterator, gVTableSolveBlock, normalIter);
+
+		for (PxU32 i = 0; i < articulationListSize; ++i)
+			articulationListStart[i].articulation->solveInternalConstraints(params.dt, params.dt, params.invDt, true, isTGS, 0.f, biasCoefficient, residualReportingActive);
+		++normalIter;
+	}
+
+	PxI32* outThresholdPairs = params.outThresholdPairs;
+	ThresholdStreamElement* PX_RESTRICT thresholdStream = params.thresholdStream;
+	PxU32 thresholdStreamLength = params.thresholdStreamLength;
+
+	cache.writeBackIteration = true;	
+	cache.mSharedThresholdStream = thresholdStream;
+	cache.mSharedThresholdStreamLength = thresholdStreamLength;
+	cache.mSharedOutThresholdPairs = outThresholdPairs;
+	//PGS solver always runs at least one velocity iteration (otherwise writeback won't happen)
+	{
+		if (cache.contactErrorAccumulator)
+			cache.contactErrorAccumulator->reset();
+
+		SolveBlockParallel(constraintList, batchCount, normalIter * batchCount, batchCount, 
+			cache, contactIterator, gVTableSolveWriteBackBlock, normalIter);
+
+		for (PxU32 i = 0; i < articulationListSize; ++i)
+		{
+			articulationListStart[i].articulation->solveInternalConstraints(params.dt, params.dt, params.invDt, true, isTGS, 0.f, biasCoefficient, residualReportingActive);
+			articulationListStart[i].articulation->writebackInternalConstraints(false);
+		}
+
+		++normalIter;
+	}
+
+	//Write back remaining threshold streams
+	if(cache.mThresholdStreamIndex > 0)
+	{
+		//Write back to global buffer
+		const PxI32 threshIndex = PxAtomicAdd(outThresholdPairs, PxI32(cache.mThresholdStreamIndex)) - PxI32(cache.mThresholdStreamIndex);
+		for(PxU32 b = 0; b < cache.mThresholdStreamIndex; ++b)
+		{
+			thresholdStream[b + threshIndex] = cache.mThresholdStream[b];
+		}
+		cache.mThresholdStreamIndex = 0;
+	}
+}
+
+void solveVParallelAndWriteBack(SolverIslandParams& params, Cm::SpatialVectorF* deltaV, Dy::ErrorAccumulatorEx* errorAccumulator,
+	bool solveFrictionEveryIteration)
+{
+#if PX_PROFILE_SOLVE_STALLS
+	PxU64 startTime = readTimer();
+
+	PxU64 stallCount = 0;
+#endif
+	const PxF32 biasCoefficient = DY_ARTICULATION_PGS_BIAS_COEFFICIENT;
+	const bool isTGS = false;
+	const bool residualReportingActive = errorAccumulator != NULL;
+
+	SolverContext cache;
+	cache.solverBodyArray = params.bodyDataList;
+	const PxU32 batchSize = params.batchSize;
+
+	const PxI32 UnrollCount = PxI32(batchSize);
+	const PxI32 ArticCount = 2;
+	const PxI32 SaveUnrollCount = 32;
+
+	const PxI32 TempThresholdStreamSize = 32;
+	ThresholdStreamElement tempThresholdStream[TempThresholdStreamSize];
+
+	const PxI32 bodyListSize = PxI32(params.bodyListSize);
+	const PxI32 articulationListSize = PxI32(params.articulationListSize);
+
+	const PxI32 batchCount = PxI32(params.numConstraintHeaders);
+	cache.mThresholdStream = tempThresholdStream;
+	cache.mThresholdStreamLength = TempThresholdStreamSize;
+	cache.mThresholdStreamIndex = 0;
+	cache.writeBackIteration = false;
+	cache.deltaV = deltaV;
+
+	const PxReal dt = params.dt;
+	const PxReal invDt = params.invDt;
+
+	const PxI32 positionIterations = PxI32(params.positionIterations);
+
+	PxI32* constraintIndex = &params.constraintIndex; // counter for distributing constraints to tasks, incremented before they're solved
+	PxI32* constraintIndexCompleted = &params.constraintIndexCompleted; // counter for completed constraints, incremented after they're solved
+
+	PxI32* articIndex = &params.articSolveIndex;
+	PxI32* articIndexCompleted = &params.articSolveIndexCompleted;
+
+	const PxSolverConstraintDesc* PX_RESTRICT constraintList = params.constraintList;
+
+	const ArticulationSolverDesc* PX_RESTRICT articulationListStart = params.articulationListStart;
+
+	const PxU32 nbPartitions = params.nbPartitions;	
+
+	const PxU32* headersPerPartition = params.headersPerPartition;
+
+	PX_ASSERT(positionIterations >= 1);
+
+	PxI32 endIndexCount = UnrollCount;
+	PxI32 index = PxAtomicAdd(constraintIndex, UnrollCount) - UnrollCount;
+
+	PxI32 articSolveStart = 0;
+	PxI32 articSolveEnd = 0;
+	PxI32 maxArticIndex = 0;
+	PxI32 articIndexCounter = 0;
+	
+	BatchIterator contactIter(params.constraintBatchHeaders, params.numConstraintHeaders);
+
+	PxI32 maxNormalIndex = 0;
+	PxI32 normalIteration = 0;
+	PxU32 a = 0;
+	PxI32 targetConstraintIndex = 0;
+	PxI32 targetArticIndex = 0;
+	
+	cache.contactErrorAccumulator = residualReportingActive ? &errorAccumulator->mPositionIterationErrorAccumulator : NULL;
+	cache.isPositionIteration = true;
+	for(PxU32 i = 0; i < 2; ++i)
+	{
+		SolveBlockMethod* solveTable = i == 0 ? gVTableSolveBlock : gVTableSolveConcludeBlock;
+		for(; a < positionIterations - 1 + i; ++a)
+		{
+			WAIT_FOR_PROGRESS(articIndexCompleted, targetArticIndex); // wait for arti solve of previous iteration
+
+			if (i == 0 && cache.contactErrorAccumulator)
+				cache.contactErrorAccumulator->reset();
+
+			cache.doFriction = solveFrictionEveryIteration ? true : (positionIterations - a) <= 3;
+			for(PxU32 b = 0; b < nbPartitions; ++b)
+			{
+				WAIT_FOR_PROGRESS(constraintIndexCompleted, targetConstraintIndex); // wait for rigid solve of previous partition
+
+				maxNormalIndex += headersPerPartition[b];
+				
+				PxI32 nbSolved = 0;
+				while(index < maxNormalIndex)
+				{
+					const PxI32 remainder = PxMin(maxNormalIndex - index, endIndexCount);
+					SolveBlockParallel(constraintList, remainder, index, batchCount, cache, contactIter, solveTable, 
+						normalIteration);
+					index += remainder;
+					endIndexCount -= remainder;
+					nbSolved += remainder;
+					if(endIndexCount == 0)
+					{
+						endIndexCount = UnrollCount;
+						index = PxAtomicAdd(constraintIndex, UnrollCount) - UnrollCount;
+					}
+				}
+				if(nbSolved)
+				{
+					PxMemoryBarrier();
+					PxAtomicAdd(constraintIndexCompleted, nbSolved);
+				}
+				targetConstraintIndex += headersPerPartition[b]; //Increment target constraint index by batch count
+			}
+
+			WAIT_FOR_PROGRESS(constraintIndexCompleted, targetConstraintIndex); // wait for all rigid partitions to be done
+
+			maxArticIndex += articulationListSize;
+			targetArticIndex += articulationListSize;
+
+			while (articSolveStart < maxArticIndex)
+			{
+				const PxI32 endIdx = PxMin(articSolveEnd, maxArticIndex);
+
+				PxI32 nbSolved = 0;
+				while (articSolveStart < endIdx)
+				{
+					articulationListStart[articSolveStart - articIndexCounter].articulation->solveInternalConstraints(dt, dt, invDt, false, isTGS, 0.f, biasCoefficient, residualReportingActive);
+					articSolveStart++;
+					nbSolved++;
+				}
+
+				if (nbSolved)
+				{
+					PxMemoryBarrier();
+					PxAtomicAdd(articIndexCompleted, nbSolved);
+				}
+
+				const PxI32 remaining = articSolveEnd - articSolveStart;
+
+				if (remaining == 0)
+				{
+					articSolveStart = PxAtomicAdd(articIndex, ArticCount) - ArticCount;
+					articSolveEnd = articSolveStart + ArticCount;
+				}
+			}
+
+			articIndexCounter += articulationListSize;
+
+			++normalIteration;
+		}
+	}
+
+	PxI32* bodyListIndex = &params.bodyListIndex;
+	PxI32* bodyListIndexCompleted = &params.bodyListIndexCompleted;
+
+	const PxSolverBody* PX_RESTRICT bodyListStart = params.bodyListStart;
+	Cm::SpatialVector* PX_RESTRICT motionVelocityArray = params.motionVelocityArray;
+
+	//Save velocity - articulated
+	PxI32 endIndexCount2 = SaveUnrollCount;
+	PxI32 index2 = PxAtomicAdd(bodyListIndex, SaveUnrollCount) - SaveUnrollCount;
+	{
+		WAIT_FOR_PROGRESS(articIndexCompleted, targetArticIndex); // wait for all articulation solves before saving velocity
+		WAIT_FOR_PROGRESS(constraintIndexCompleted, targetConstraintIndex); // wait for all rigid partition solves before saving velocity
+		PxI32 nbConcluded = 0;
+		while(index2 < articulationListSize)
+		{
+			const PxI32 remainder = PxMin(SaveUnrollCount, (articulationListSize - index2));
+			endIndexCount2 -= remainder;
+			for(PxI32 b = 0; b < remainder; ++b, ++index2)
+			{
+				ArticulationPImpl::saveVelocity(articulationListStart[index2].articulation, cache.deltaV);
+			}
+			if(endIndexCount2 == 0)
+			{
+				index2 = PxAtomicAdd(bodyListIndex, SaveUnrollCount) - SaveUnrollCount;
+				endIndexCount2 = SaveUnrollCount;
+			}
+			nbConcluded += remainder;
+		}
+
+		index2 -= articulationListSize;
+
+		//save velocity
+		
+		while(index2 < bodyListSize)
+		{
+			const PxI32 remainder = PxMin(endIndexCount2, (bodyListSize - index2));
+			endIndexCount2 -= remainder;
+
+			saveMotionVelocities(remainder, bodyListStart + index2, motionVelocityArray + index2);
+			index2 += remainder;
+
+			nbConcluded += remainder;
+			
+			//Branch not required because this is the last time we use this atomic variable
+			//if(index2 < articulationListSizePlusbodyListSize)
+			{
+				index2 = PxAtomicAdd(bodyListIndex, SaveUnrollCount) - SaveUnrollCount - articulationListSize;
+				endIndexCount2 = SaveUnrollCount;
+			}
+		}
+
+		if(nbConcluded)
+		{
+			PxMemoryBarrier();
+			PxAtomicAdd(bodyListIndexCompleted, nbConcluded);
+		}
+	}
+
+	WAIT_FOR_PROGRESS(bodyListIndexCompleted, (bodyListSize + articulationListSize)); // wait for all velocity saves to be done
+	
+	cache.contactErrorAccumulator = residualReportingActive ? &errorAccumulator->mVelocityIterationErrorAccumulator : NULL;
+	cache.isPositionIteration = false;
+
+	a = 1;
+	for(; a < params.velocityIterations; ++a)
+	{
+		WAIT_FOR_PROGRESS(articIndexCompleted, targetArticIndex); // wait for arti solve of previous iteration
+		
+		if (residualReportingActive)
+			cache.contactErrorAccumulator->reset();
+		
+		for(PxU32 b = 0; b < nbPartitions; ++b)
+		{
+			WAIT_FOR_PROGRESS(constraintIndexCompleted, targetConstraintIndex); // wait for rigid solve of previous partition
+
+			maxNormalIndex += headersPerPartition[b];
+			
+			PxI32 nbSolved = 0;
+			while(index < maxNormalIndex)
+			{
+				const PxI32 remainder = PxMin(maxNormalIndex - index, endIndexCount);
+				SolveBlockParallel(constraintList, remainder, index, batchCount, cache, contactIter, gVTableSolveBlock, 
+					normalIteration);
+				index += remainder;
+				endIndexCount -= remainder;
+				nbSolved += remainder;
+				if(endIndexCount == 0)
+				{
+					endIndexCount = UnrollCount;
+					index = PxAtomicAdd(constraintIndex, UnrollCount) - UnrollCount;
+				}
+			}
+			if(nbSolved)
+			{
+				PxMemoryBarrier();
+				PxAtomicAdd(constraintIndexCompleted, nbSolved);
+			}
+			targetConstraintIndex += headersPerPartition[b]; //Increment target constraint index by batch count
+		}
+
+		WAIT_FOR_PROGRESS(constraintIndexCompleted, targetConstraintIndex); // wait for all rigid partitions to be done
+
+		maxArticIndex += articulationListSize;
+		targetArticIndex += articulationListSize;
+
+		while (articSolveStart < maxArticIndex)
+		{
+			const PxI32 endIdx = PxMin(articSolveEnd, maxArticIndex);
+
+			PxI32 nbSolved = 0;
+			while (articSolveStart < endIdx)
+			{
+				articulationListStart[articSolveStart - articIndexCounter].articulation->solveInternalConstraints(dt, dt, invDt, true, isTGS, 0.f, biasCoefficient, residualReportingActive);
+				articSolveStart++;
+				nbSolved++;
+			}
+
+			if (nbSolved)
+			{
+				PxMemoryBarrier();
+				PxAtomicAdd(articIndexCompleted, nbSolved);
+			}
+
+			const PxI32 remaining = articSolveEnd - articSolveStart;
+
+			if (remaining == 0)
+			{
+				articSolveStart = PxAtomicAdd(articIndex, ArticCount) - ArticCount;
+				articSolveEnd = articSolveStart + ArticCount;
+			}
+		}
+		++normalIteration;
+		articIndexCounter += articulationListSize;
+	}
+
+	ThresholdStreamElement* PX_RESTRICT thresholdStream = params.thresholdStream;
+	PxU32 thresholdStreamLength = params.thresholdStreamLength;
+	PxI32* outThresholdPairs = params.outThresholdPairs;
+
+	cache.mSharedOutThresholdPairs = outThresholdPairs;
+	cache.mSharedThresholdStream = thresholdStream;
+	cache.mSharedThresholdStreamLength = thresholdStreamLength;
+
+	//Last iteration - do writeback as well!
+	cache.writeBackIteration = true;
+	{
+		WAIT_FOR_PROGRESS(articIndexCompleted, targetArticIndex); // wait for arti velocity iterations to be done
+		
+		if (residualReportingActive)
+			cache.contactErrorAccumulator->reset();
+		
+		for(PxU32 b = 0; b < nbPartitions; ++b)
+		{
+			WAIT_FOR_PROGRESS(constraintIndexCompleted, targetConstraintIndex); // wait for rigid partition velocity iterations to be done resp. previous partition writeback iteration
+
+			maxNormalIndex += headersPerPartition[b];
+			
+			PxI32 nbSolved = 0;
+			while(index < maxNormalIndex)
+			{
+				const PxI32 remainder = PxMin(maxNormalIndex - index, endIndexCount);
+
+				SolveBlockParallel(constraintList, remainder, index, batchCount, cache, contactIter, gVTableSolveWriteBackBlock, 
+					normalIteration);
+
+				index += remainder;
+				endIndexCount -= remainder;
+				nbSolved += remainder;
+				if(endIndexCount == 0)
+				{
+					endIndexCount = UnrollCount;
+					index = PxAtomicAdd(constraintIndex, UnrollCount) - UnrollCount;
+				}
+			}
+			if(nbSolved)
+			{
+				PxMemoryBarrier();
+				PxAtomicAdd(constraintIndexCompleted, nbSolved);
+			}
+			targetConstraintIndex += headersPerPartition[b]; //Increment target constraint index by batch count
+		}
+		{
+			WAIT_FOR_PROGRESS(constraintIndexCompleted, targetConstraintIndex); // wait for rigid partitions writeback iterations to be done
+
+			maxArticIndex += articulationListSize;
+			targetArticIndex += articulationListSize;
+
+			while (articSolveStart < maxArticIndex)
+			{
+				const PxI32 endIdx = PxMin(articSolveEnd, maxArticIndex);
+
+				PxI32 nbSolved = 0;
+				while (articSolveStart < endIdx)
+				{
+					articulationListStart[articSolveStart - articIndexCounter].articulation->solveInternalConstraints(dt, dt, invDt, false, isTGS, 0.f, biasCoefficient, residualReportingActive);
+					articulationListStart[articSolveStart - articIndexCounter].articulation->writebackInternalConstraints(false);
+					articSolveStart++;
+					nbSolved++;
+				}
+
+				if (nbSolved)
+				{
+					PxMemoryBarrier();
+					PxAtomicAdd(articIndexCompleted, nbSolved);
+				}
+
+				PxI32 remaining = articSolveEnd - articSolveStart;
+
+				if (remaining == 0)
+				{
+					articSolveStart = PxAtomicAdd(articIndex, ArticCount) - ArticCount;
+					articSolveEnd = articSolveStart + ArticCount;
+				}
+			}
+
+			articIndexCounter += articulationListSize; // not strictly necessary but better safe than sorry
+			WAIT_FOR_PROGRESS(articIndexCompleted, targetArticIndex); // wait for arti solve+writeback to be done
+		}
+
+		// At this point we've awaited the completion all rigid partitions and all articulations
+		// No more syncing on the outside of this function is required.
+
+		if(cache.mThresholdStreamIndex > 0)
+		{
+			//Write back to global buffer
+			PxI32 threshIndex = PxAtomicAdd(outThresholdPairs, PxI32(cache.mThresholdStreamIndex)) - PxI32(cache.mThresholdStreamIndex);
+			for(PxU32 b = 0; b < cache.mThresholdStreamIndex; ++b)
+			{
+				thresholdStream[b + threshIndex] = cache.mThresholdStream[b];
+			}
+			cache.mThresholdStreamIndex = 0;
+		}
+
+		++normalIteration;
+	}
+
+#if PX_PROFILE_SOLVE_STALLS
+	PxU64 endTime = readTimer();
+	PxReal totalTime = (PxReal)(endTime - startTime);
+	PxReal stallTime = (PxReal)stallCount;
+	PxReal stallRatio = stallTime/totalTime;
+	if(0)//stallRatio > 0.2f)
+	{
+		LARGE_INTEGER frequency;
+		QueryPerformanceFrequency( &frequency );
+		printf("Warning -- percentage time stalled = %f; stalled for %f seconds; total Time took %f seconds\n", 
+			stallRatio * 100.f, stallTime/(PxReal)frequency.QuadPart, totalTime/(PxReal)frequency.QuadPart);
+	}
+#endif
+}
+
+}
+}
+
--- a/engine/third_party/physx/source/lowleveldynamics/src/DySolverControl.h
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DySolverControl.h
@@ -0,0 +1,54 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef DY_SOLVER_CONTROL_H
+#define DY_SOLVER_CONTROL_H
+
+#include "DySolverCore.h"
+
+namespace physx
+{
+namespace Dy
+{
+
+/*
+solves dual problem exactly by GS-iterating until convergence stops
+only uses regular velocity vector for storing results, and backs up initial state, which is restored.
+the solution forces are saved in a vector.
+
+state should not be stored, this function is safe to call from multiple threads.
+*/
+void solveVParallelAndWriteBack(SolverIslandParams& params, Cm::SpatialVectorF* deltaV, Dy::ErrorAccumulatorEx* errorAccumulator, 
+	bool solveFrictionEveryIteration);
+
+void solveV_Blocks(SolverIslandParams& params, bool solveFrictionEveryIteration);
+
+}
+}
+
+#endif
--- a/engine/third_party/physx/source/lowleveldynamics/src/DySolverCore.cpp
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DySolverCore.cpp
@@ -0,0 +1,83 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#include "foundation/PxPreprocessor.h"
+#include "DySolverCore.h"
+#include "DyArticulationPImpl.h"
+#include "DyCpuGpuArticulation.h"
+
+using namespace physx;
+using namespace aos;
+using namespace Dy;
+
+// PT: TODO: ideally we could reuse this in immediate mode as well, but the code currently uses separate arrays of PxVec3s instead of
+// spatial vectors so the SIMD code won't work there. Switching to spatial vectors requires a change in the immediate mode API (PxSolveConstraints).
+void Dy::saveMotionVelocities(PxU32 nbBodies, const PxSolverBody* PX_RESTRICT solverBodies, Cm::SpatialVector* PX_RESTRICT motionVelocityArray)
+{
+	for(PxU32 i=0; i<nbBodies; i++)
+	{
+		Cm::SpatialVector& motionVel = motionVelocityArray[i];
+		const PxSolverBody& body = solverBodies[i];
+
+		V4StoreA(V4LoadA(&body.linearVelocity.x), &motionVel.linear.x);
+		V4StoreA(V4LoadA(&body.angularState.x), &motionVel.angular.x);
+
+		PX_ASSERT(motionVel.linear.isFinite());
+		PX_ASSERT(motionVel.angular.isFinite());
+	}
+}
+
+// PT: this case is reached when e.g. a lot of objects falling but not touching yet. So there are no contacts but potentially a lot of bodies.
+// See LegacyBenchmark/falling_spheres for example.
+void Dy::solveNoContactsCase(	PxU32 nbBodies, const PxSolverBody* PX_RESTRICT solverBodies, Cm::SpatialVector* PX_RESTRICT motionVelocityArray,
+								PxU32 nbArticulations, ArticulationSolverDesc* PX_RESTRICT articulationListStart, Cm::SpatialVectorF* PX_RESTRICT deltaV,
+								PxU32 nbPosIter, PxU32 nbVelIter, PxF32 dt, PxF32 invDt, bool residualReportingActive)
+{
+	saveMotionVelocities(nbBodies, solverBodies, motionVelocityArray);
+
+	if(!nbArticulations)
+		return;
+
+	const PxF32 biasCoefficient = DY_ARTICULATION_PGS_BIAS_COEFFICIENT;
+	const bool isTGS = false;
+
+	//Even thought there are no external constraints, there may still be internal constraints in the articulations...
+	for(PxU32 i=0; i<nbPosIter; i++)
+		for(PxU32 j=0; j<nbArticulations; j++)
+			articulationListStart[j].articulation->solveInternalConstraints(dt, dt, invDt, false, isTGS, 0.0f, biasCoefficient, residualReportingActive);
+
+	for(PxU32 i=0; i<nbArticulations; i++)
+		ArticulationPImpl::saveVelocity(articulationListStart[i].articulation, deltaV);
+
+	for(PxU32 i=0; i<nbVelIter; i++)
+		for(PxU32 j=0; j<nbArticulations; j++)
+			articulationListStart[j].articulation->solveInternalConstraints(dt, dt, invDt, true, isTGS, 0.0f, biasCoefficient, residualReportingActive);
+
+	for(PxU32 j=0; j<nbArticulations; j++)
+		articulationListStart[j].articulation->writebackInternalConstraints(isTGS);
+}
--- a/engine/third_party/physx/source/lowleveldynamics/src/DySolverCore.h
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DySolverCore.h
@@ -0,0 +1,257 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef DY_SOLVER_CORE_H
+#define DY_SOLVER_CORE_H
+
+#include "PxvConfig.h"
+#include "foundation/PxArray.h"
+#include "foundation/PxThread.h"
+#include "foundation/PxUserAllocated.h"
+#include "CmSpatialVector.h"
+#include "DySolverConstraintDesc.h"
+#include "DyResidualAccumulator.h"
+// PT: it is not wrong to include DyPGS.h here because the SolverCore class is actually only used by PGS.
+// (for patch friction). TGS doesn't use the same architecture / class hierarchy.
+#include "DyPGS.h"
+
+namespace physx
+{
+struct PxSolverBody;
+struct PxSolverBodyData;
+struct PxSolverConstraintDesc;
+struct PxConstraintBatchHeader;
+class PxsRigidBody;
+struct PxsBodyCore;
+
+namespace Dy
+{
+struct ThresholdStreamElement;
+struct ArticulationSolverDesc;
+
+#define PX_PROFILE_SOLVE_STALLS 0
+#if PX_PROFILE_SOLVE_STALLS
+#if PX_WINDOWS
+#include "foundation/windows/PxWindowsInclude.h"
+
+PX_FORCE_INLINE PxU64 readTimer()
+{
+	//return __rdtsc();
+
+	LARGE_INTEGER i;
+	QueryPerformanceCounter(&i);
+	return i.QuadPart;
+}
+
+#endif
+#endif
+
+#define YIELD_THREADS 1
+
+#if YIELD_THREADS
+
+#define ATTEMPTS_BEFORE_BACKOFF 30000
+#define ATTEMPTS_BEFORE_RETEST 10000
+
+#endif
+
+PX_INLINE void WaitForProgressCount(volatile PxI32* pGlobalIndex, const PxI32 targetIndex)
+{
+#if YIELD_THREADS
+	if(*pGlobalIndex < targetIndex)
+	{
+		bool satisfied = false;
+		PxU32 count = ATTEMPTS_BEFORE_BACKOFF;
+		do
+		{
+			satisfied = true;
+			while(*pGlobalIndex < targetIndex)
+			{
+				if(--count == 0)
+				{
+					satisfied = false;
+					break;
+				}
+			}
+			if(!satisfied)
+				PxThread::yield();
+			count = ATTEMPTS_BEFORE_RETEST;
+		}
+		while(!satisfied);
+	}
+#else
+	while(*pGlobalIndex < targetIndex);
+#endif
+}
+
+#if PX_PROFILE_SOLVE_STALLS
+PX_INLINE void WaitForProgressCount(volatile PxI32* pGlobalIndex, const PxI32 targetIndex, PxU64& stallTime)
+{
+	if(*pGlobalIndex < targetIndex)
+	{
+		bool satisfied = false;
+		PxU32 count = ATTEMPTS_BEFORE_BACKOFF;
+		do
+		{
+			satisfied = true;
+			PxU64 startTime = readTimer();
+			while(*pGlobalIndex < targetIndex)
+			{
+				if(--count == 0)
+				{
+					satisfied = false;
+					break;
+				}
+			}
+			PxU64 endTime = readTimer();
+			stallTime += (endTime - startTime);
+			if(!satisfied)
+				PxThread::yield();
+			count = ATTEMPTS_BEFORE_BACKOFF;
+		}
+		while(!satisfied);
+	}
+}
+
+#define WAIT_FOR_PROGRESS(pGlobalIndex, targetIndex) if(*pGlobalIndex < targetIndex) WaitForProgressCount(pGlobalIndex, targetIndex, stallCount)
+#else
+#define WAIT_FOR_PROGRESS(pGlobalIndex, targetIndex) if(*pGlobalIndex < targetIndex) WaitForProgressCount(pGlobalIndex, targetIndex)
+#endif
+#define WAIT_FOR_PROGRESS_NO_TIMER(pGlobalIndex, targetIndex) if(*pGlobalIndex < targetIndex) WaitForProgressCount(pGlobalIndex, targetIndex)
+
+struct SolverIslandParams
+{
+	//Default friction model params
+	PxU32 positionIterations;
+	PxU32 velocityIterations;
+	const PxSolverBody* bodyListStart;
+	PxSolverBodyData* bodyDataList;
+	PxU32 bodyListSize;
+	PxU32 solverBodyOffset;	// PT: not really needed by the solvers themselves, only by the integration code
+	ArticulationSolverDesc* articulationListStart;
+	PxU32 articulationListSize;
+	const PxSolverConstraintDesc* constraintList;
+	const PxConstraintBatchHeader* constraintBatchHeaders;
+	PxU32 numConstraintHeaders;
+	const PxU32* headersPerPartition;	// PT: only used by the multi-threaded solver
+	PxU32 nbPartitions;	// PT: only used by the multi-threaded solver
+	Cm::SpatialVector* motionVelocityArray;
+	PxU32 batchSize;	// PT: only used by the multi-threaded solver
+	PxsRigidBody** rigidBodies;	// PT: not really needed by the solvers themselves
+
+	//Shared state progress counters
+	PxI32 constraintIndex;
+	PxI32 constraintIndexCompleted;
+	PxI32 bodyListIndex;
+	PxI32 bodyListIndexCompleted;
+	PxI32 articSolveIndex;
+	PxI32 articSolveIndexCompleted;
+	PxI32 bodyIntegrationListIndex;
+	PxI32 numObjectsIntegrated;
+
+	PxReal dt;
+	PxReal invDt;
+
+	//Write-back threshold information
+	ThresholdStreamElement* thresholdStream;
+	PxU32 thresholdStreamLength;
+
+	PxI32* outThresholdPairs;
+
+	PxU32 mMaxArticulationLinks;	// PT: not really needed by the solvers themselves
+	Cm::SpatialVectorF* deltaV;		// PT: only used by the single-threaded solver for temporarily storing velocities during propagation
+	Dy::ErrorAccumulatorEx* errorAccumulator; //only used by the single-threaded solver
+};
+
+void solveNoContactsCase(	PxU32 bodyListSize, const PxSolverBody* PX_RESTRICT bodyListStart, Cm::SpatialVector* PX_RESTRICT motionVelocityArray,
+							PxU32 articulationListSize, ArticulationSolverDesc* PX_RESTRICT articulationListStart, Cm::SpatialVectorF* PX_RESTRICT deltaV,
+							PxU32 positionIterations, PxU32 velocityIterations, PxF32 dt, PxF32 invDt, bool residualReportingActive);
+
+void saveMotionVelocities(PxU32 nbBodies, const PxSolverBody* PX_RESTRICT solverBodies, Cm::SpatialVector* PX_RESTRICT motionVelocityArray);
+
+class BatchIterator
+{
+	PX_NOCOPY(BatchIterator)
+public:
+	const PxConstraintBatchHeader* mConstraintBatchHeaders;
+	const PxU32 mSize;
+	PxU32 mCurrentIndex;
+
+	BatchIterator(const PxConstraintBatchHeader* constraintBatchHeaders, PxU32 size) : mConstraintBatchHeaders(constraintBatchHeaders),
+		mSize(size), mCurrentIndex(0)
+	{
+	}
+
+	PX_FORCE_INLINE const PxConstraintBatchHeader& GetCurrentHeader(const PxU32 constraintIndex)
+	{
+		PxU32 currentIndex = mCurrentIndex;
+		while((constraintIndex - mConstraintBatchHeaders[currentIndex].startIndex) >= mConstraintBatchHeaders[currentIndex].stride)
+			currentIndex = (currentIndex + 1)%mSize;
+		PxPrefetchLine(&mConstraintBatchHeaders[currentIndex], 128);
+		mCurrentIndex = currentIndex;
+		return mConstraintBatchHeaders[currentIndex];
+	}
+};
+
+inline void SolveBlockParallel(	const PxSolverConstraintDesc* PX_RESTRICT constraintList, PxI32 batchCount, PxI32 index,
+								const PxI32 headerCount, SolverContext& cache, BatchIterator& iterator,
+								SolveBlockMethod solveTable[],
+								PxI32 iteration)
+{
+	const PxI32 indA = index - (iteration * headerCount);
+
+	const PxConstraintBatchHeader* PX_RESTRICT headers = iterator.mConstraintBatchHeaders;
+
+	const PxI32 endIndex = indA + batchCount;
+	for(PxI32 i = indA; i < endIndex; ++i)
+	{
+		PX_ASSERT(i < PxI32(iterator.mSize));
+		const PxConstraintBatchHeader& header = headers[i];
+
+		const PxI32 numToGrab = header.stride;
+		const PxSolverConstraintDesc* PX_RESTRICT block = &constraintList[header.startIndex];
+
+		// PT: TODO: revisit this one
+		PxPrefetch(block[0].constraint, 384);
+
+		for(PxI32 b = 0; b < numToGrab; ++b)
+		{
+			PxPrefetchLine(block[b].bodyA);
+			PxPrefetchLine(block[b].bodyB);
+		}
+
+		//OK. We have a number of constraints to run...
+		solveTable[header.constraintType](block, PxU32(numToGrab), cache);
+	}
+}
+
+}
+
+}
+
+#endif
--- a/engine/third_party/physx/source/lowleveldynamics/src/DySolverExt.h
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DySolverExt.h
@@ -0,0 +1,93 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef DY_SOLVER_EXT_H
+#define DY_SOLVER_EXT_H
+
+#include "foundation/PxVec3.h"
+#include "foundation/PxTransform.h"
+#include "CmSpatialVector.h"
+#include "foundation/PxVecMath.h"
+
+namespace physx
+{
+
+class PxsRigidBody;
+struct PxsBodyCore;
+struct PxSolverBody;
+struct PxSolverBodyData;
+
+
+namespace Dy
+{
+
+
+class FeatherstoneArticulation;
+struct SolverConstraint1D;
+
+class SolverExtBody
+{
+public:
+	union
+	{
+		const FeatherstoneArticulation* mArticulation;
+		const PxSolverBody* mBody;
+	};
+	const PxSolverBodyData* mBodyData;
+
+	PxU32 mLinkIndex;
+
+	SolverExtBody(const void* bodyOrArticulationOrSoftBody, const void* bodyData, PxU32 linkIndex): 
+	  mBody(reinterpret_cast<const PxSolverBody*>(bodyOrArticulationOrSoftBody)),
+	  mBodyData(reinterpret_cast<const PxSolverBodyData*>(bodyData)),
+		  mLinkIndex(linkIndex)
+	  {}
+
+	  void getResponse(const PxVec3& linImpulse, const PxVec3& angImpulse,
+					   PxVec3& linDeltaV, PxVec3& angDeltaV, PxReal dominance) const;
+
+	  void getResponse(const aos::Vec3V& linImpulse, const aos::Vec3V& angImpulse,
+		  aos::Vec3V& linDeltaV, aos::Vec3V& angDeltaV, aos::FloatV dominance) const;
+
+	  PxReal projectVelocity(const PxVec3& linear, const PxVec3& angular) const;
+	  aos::FloatV projectVelocity(const aos::Vec3V& linear, const aos::Vec3V& angular) const;
+	  PxVec3 getLinVel() const;
+	  PxVec3 getAngVel() const;
+
+	  aos::Vec3V getLinVelV() const;
+	  aos::Vec3V getAngVelV() const;
+
+	  Cm::SpatialVectorV getVelocity() const;
+	  PxReal getCFM() const;
+};
+
+}
+
+}
+
+#endif
--- a/engine/third_party/physx/source/lowleveldynamics/src/DyTGS.h
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DyTGS.h
@@ -0,0 +1,74 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef DY_TGS_H
+#define DY_TGS_H
+
+#include "foundation/PxPreprocessor.h"
+
+namespace physx
+{
+	struct PxConstraintBatchHeader;
+	struct PxSolverConstraintDesc;
+	struct PxTGSSolverBodyTxInertia;
+	struct PxTGSSolverBodyData;
+	struct PxTGSSolverBodyVel;
+
+	namespace Dy
+	{
+		struct SolverContext;
+
+		// PT: using defines like we did in Gu (GU_OVERLAP_FUNC_PARAMS, etc). Additionally this gives a
+		// convenient way to find the TGS solver methods, which are scattered in different files and use
+		// the same function names as other functions (with a different signature).
+
+		#define DY_TGS_SOLVE_METHOD_PARAMS		const PxConstraintBatchHeader& hdr, const PxSolverConstraintDesc* desc, const PxTGSSolverBodyTxInertia* const txInertias, PxReal minPenetration, PxReal elapsedTime, SolverContext& cache
+		#define DY_TGS_CONCLUDE_METHOD_PARAMS	const PxConstraintBatchHeader& hdr, const PxSolverConstraintDesc* desc, const PxTGSSolverBodyTxInertia* const txInertias, PxReal elapsedTime, SolverContext& cache
+		#define DY_TGS_WRITEBACK_METHOD_PARAMS	const PxConstraintBatchHeader& hdr, const PxSolverConstraintDesc* desc, SolverContext* cache
+
+		typedef void (*TGSSolveBlockMethod)		(DY_TGS_SOLVE_METHOD_PARAMS);
+		typedef void (*TGSSolveConcludeMethod)	(DY_TGS_CONCLUDE_METHOD_PARAMS);
+		typedef void (*TGSWriteBackMethod)		(DY_TGS_WRITEBACK_METHOD_PARAMS);
+		
+		extern TGSSolveBlockMethod		g_SolveTGSMethods[];
+		extern TGSSolveConcludeMethod	g_SolveConcludeTGSMethods[];
+		extern TGSWriteBackMethod		g_WritebackTGSMethods[];
+
+		// PT: also used by immediate mode
+		void copyToSolverBodyDataStep(const PxVec3& linearVelocity, const PxVec3& angularVelocity, PxReal invMass, const PxVec3& invInertia, const PxTransform& globalPose,
+			PxReal maxDepenetrationVelocity, PxReal maxContactImpulse, PxU32 nodeIndex, PxReal reportThreshold,
+			PxReal maxAngVelSq, PxU32 lockFlags, bool isKinematic,
+			PxTGSSolverBodyVel& solverVel, PxTGSSolverBodyTxInertia& solverBodyTxInertia, PxTGSSolverBodyData& solverBodyData,
+			PxReal dt, bool gyroscopicForces);
+
+		// PT: also used by immediate mode
+		void integrateCoreStep(PxTGSSolverBodyVel& vel, PxTGSSolverBodyTxInertia& txInertia, PxF32 dt);
+	}
+}
+
+#endif
--- a/engine/third_party/physx/source/lowleveldynamics/src/DyTGSContactPrep.cpp
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DyTGSContactPrep.cpp
--- a/engine/third_party/physx/source/lowleveldynamics/src/DyTGSContactPrep.h
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DyTGSContactPrep.h
@@ -0,0 +1,126 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef DY_TGS_CONTACT_PREP_H
+#define DY_TGS_CONTACT_PREP_H
+
+#include "foundation/PxPreprocessor.h"
+#include "DySolverConstraintDesc.h"
+#include "PxSceneDesc.h"
+#include "DySolverContact4.h"
+
+namespace physx
+{
+	struct PxsContactManagerOutput;
+	struct PxSolverConstraintDesc;
+
+	namespace Dy
+	{
+		class ThreadContext;
+		struct CorrelationBuffer;
+
+		bool createFinalizeSolverContactsStep(PxTGSSolverContactDesc& contactDesc,
+			PxsContactManagerOutput& output,
+			ThreadContext& threadContext,
+			const PxReal invDtF32,
+			const PxReal invTotalDtF32,
+			const PxReal totalDtF32,
+			const PxReal stepDt,
+			const PxReal bounceThresholdF32,
+			const PxReal frictionOffsetThreshold,
+			const PxReal correlationDistance,
+			const PxReal biasCoefficient,
+			PxConstraintAllocator& constraintAllocator);
+
+		bool createFinalizeSolverContactsStep(
+			PxTGSSolverContactDesc& contactDesc,
+			CorrelationBuffer& c,
+			const PxReal invDtF32,
+			const PxReal invTotalDtF32,
+			const PxReal totalDtF32,
+			const PxReal dtF32,
+			const PxReal bounceThresholdF32,
+			const PxReal frictionOffsetThreshold,
+			const PxReal correlationDistance,
+			const PxReal biasCoefficient,
+			PxConstraintAllocator& constraintAllocator);
+
+		SolverConstraintPrepState::Enum setupSolverConstraintStep4
+		(PxTGSSolverConstraintPrepDesc* PX_RESTRICT constraintDescs,
+			const PxReal dt, const PxReal totalDt, const PxReal recipdt, const PxReal recipTotalDt, PxU32& totalRows,
+			PxConstraintAllocator& allocator, PxU32 maxRows,
+			const PxReal lengthScale, const PxReal biasCoefficient, bool isResidualReportingEnabled);
+
+		PxU32 SetupSolverConstraintStep(SolverConstraintShaderPrepDesc& shaderDesc,
+			PxTGSSolverConstraintPrepDesc& prepDesc,
+			PxConstraintAllocator& allocator,
+			const PxReal dt, const PxReal totalDt, const PxReal invdt, const PxReal invTotalDt,
+			const PxReal lengthScale, const PxReal biasCoefficient);
+
+		PxU32 setupSolverConstraintStep(
+			const PxTGSSolverConstraintPrepDesc& prepDesc,
+			PxConstraintAllocator& allocator,
+			const PxReal dt, const PxReal totalDt, const PxReal invdt, const PxReal invTotalDt,
+			const PxReal lengthScale, const PxReal biasCoefficient);
+
+		SolverConstraintPrepState::Enum setupSolverConstraintStep4
+		(SolverConstraintShaderPrepDesc* PX_RESTRICT constraintShaderDescs,
+			PxTGSSolverConstraintPrepDesc* PX_RESTRICT constraintDescs,
+			const PxReal dt, const PxReal totalDt, const PxReal recipdt, const PxReal recipTotalDt, PxU32& totalRows,
+			PxConstraintAllocator& allocator, const PxReal lengthScale, const PxReal biasCoefficient, bool isResidualReportingEnabled);
+
+		SolverConstraintPrepState::Enum createFinalizeSolverContacts4Step(
+			PxsContactManagerOutput** cmOutputs,
+			ThreadContext& threadContext,
+			PxTGSSolverContactDesc* blockDescs,
+			const PxReal invDtF32,
+			const PxReal totalDtF32,
+			const PxReal invTotalDtF32,
+			const PxReal dt,
+			const PxReal bounceThresholdF32,
+			const PxReal frictionOffsetThreshold,
+			const PxReal correlationDistance,
+			const PxReal biasCoefficient,
+			PxConstraintAllocator& constraintAllocator);
+
+		SolverConstraintPrepState::Enum createFinalizeSolverContacts4Step(
+			Dy::CorrelationBuffer& c,
+			PxTGSSolverContactDesc* blockDescs,
+			const PxReal invDtF32,
+			const PxReal totalDt,
+			const PxReal invTotalDtF32,
+			const PxReal dt,
+			const PxReal bounceThresholdF32,
+			const PxReal frictionOffsetThreshold,
+			const PxReal correlationDistance,
+			const PxReal biasCoefficient,
+			PxConstraintAllocator& constraintAllocator);
+	}
+}
+
+#endif
--- a/engine/third_party/physx/source/lowleveldynamics/src/DyTGSContactPrepBlock.cpp
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DyTGSContactPrepBlock.cpp
--- a/engine/third_party/physx/source/lowleveldynamics/src/DyTGSDynamics.cpp
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DyTGSDynamics.cpp
--- a/engine/third_party/physx/source/lowleveldynamics/src/DyTGSDynamics.h
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DyTGSDynamics.h
@@ -0,0 +1,325 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef DY_TGS_DYNAMICS_H
+#define DY_TGS_DYNAMICS_H
+
+#include "DyDynamicsBase.h"
+#include "PxvConfig.h"
+#include "CmSpatialVector.h"
+#include "CmTask.h"
+#include "CmPool.h"
+#include "PxcThreadCoherentCache.h"
+#include "PxcConstraintBlockStream.h"
+#include "DySolverBody.h"
+#include "PxsIslandSim.h"
+
+namespace physx
+{
+	class PxsRigidBody;
+
+	struct PxsBodyCore;
+	class PxsIslandIndices;
+	struct PxsIndexedInteraction;
+	struct PxsIndexedContactManager;
+	struct PxsExternalAccelerationProvider;
+
+	namespace Dy
+	{
+		struct SolverContext;
+
+		// PT: TODO: what is const and what is not?
+		struct SolverIslandObjectsStep
+		{
+			PxsRigidBody**				bodies;
+			FeatherstoneArticulation**	articulations;
+			FeatherstoneArticulation**	articulationOwners;
+			PxsIndexedContactManager*	contactManagers;	// PT: points to DynamicsTGSContext::mContactList
+
+			const IG::IslandId*			islandIds;
+			PxU32						numIslands;
+			PxU32*						bodyRemapTable;
+			PxU32*						nodeIndexArray;
+
+			PxSolverConstraintDesc*		constraintDescs;
+			PxSolverConstraintDesc*		orderedConstraintDescs;
+			PxSolverConstraintDesc*		tempConstraintDescs;
+			PxConstraintBatchHeader*	constraintBatchHeaders;
+			Cm::SpatialVector*			motionVelocities;
+			PxsBodyCore**				bodyCoreArray;
+
+			PxU32						solverBodyOffset;
+			PxsExternalAccelerationProvider* externalAccelerations;
+
+			SolverIslandObjectsStep() : bodies(NULL), articulations(NULL), articulationOwners(NULL),
+				contactManagers(NULL), islandIds(NULL), numIslands(0), nodeIndexArray(NULL), constraintDescs(NULL), motionVelocities(NULL), bodyCoreArray(NULL),
+				solverBodyOffset(0), externalAccelerations(NULL)
+			{
+			}
+		};
+
+		struct IslandContextStep
+		{
+			//The thread context for this island (set in in the island start task, released in the island end task)
+			ThreadContext*		mThreadContext;
+			PxsIslandIndices	mCounts;
+			SolverIslandObjectsStep mObjects;
+			PxU32				mPosIters;
+			PxU32				mVelIters;
+			PxU32				mArticulationOffset;
+			PxReal				mStepDt;
+			PxReal				mInvStepDt;
+			PxReal				mBiasCoefficient;
+			PxI32				mSharedSolverIndex;
+			PxI32				mSolvedCount;
+			PxI32				mSharedRigidBodyIndex;
+			PxI32				mRigidBodyIntegratedCount;
+			PxI32				mSharedArticulationIndex;
+			PxI32				mArticulationIntegratedCount;
+			PxI32				mSharedGravityIndex;
+			PxI32				mGravityIntegratedCount;
+		};
+
+		class SolverBodyVelDataPool : public PxArray<PxTGSSolverBodyVel, PxAlignedAllocator<128, PxReflectionAllocator<PxTGSSolverBodyVel> > >
+		{
+			PX_NOCOPY(SolverBodyVelDataPool)
+		public:
+			SolverBodyVelDataPool() {}
+		};
+
+		class SolverBodyTxInertiaPool : public PxArray<PxTGSSolverBodyTxInertia, PxAlignedAllocator<128, PxReflectionAllocator<PxTGSSolverBodyTxInertia> > >
+		{
+			PX_NOCOPY(SolverBodyTxInertiaPool)
+		public:
+			SolverBodyTxInertiaPool() {}
+		};
+
+		class SolverBodyDataStepPool : public PxArray<PxTGSSolverBodyData, PxAlignedAllocator<128, PxReflectionAllocator<PxTGSSolverBodyData> > >
+		{
+			PX_NOCOPY(SolverBodyDataStepPool)
+		public:
+			SolverBodyDataStepPool() {}
+		};
+
+		class SolverStepConstraintDescPool : public PxArray<PxSolverConstraintDesc, PxAlignedAllocator<128, PxReflectionAllocator<PxSolverConstraintDesc> > >
+		{
+			PX_NOCOPY(SolverStepConstraintDescPool)
+		public:
+			SolverStepConstraintDescPool() { }
+		};
+
+#if PX_VC 
+#pragma warning(push)
+#pragma warning( disable : 4324 ) // Padding was added at the end of a structure because of a __declspec(align) value.
+#endif
+
+class DynamicsTGSContext : public DynamicsContextBase
+{
+	PX_NOCOPY(DynamicsTGSContext)
+public:
+									DynamicsTGSContext(PxcNpMemBlockPool* memBlockPool,
+										PxcScratchAllocator& scratchAllocator,
+										Cm::FlushPool& taskPool,
+										PxvSimStats& simStats,
+										PxTaskManager* taskManager,
+										PxVirtualAllocatorCallback* allocatorCallback,
+										PxsMaterialManager* materialManager,
+										IG::SimpleIslandManager& islandManager,
+										PxU64 contextID,
+										bool enableStabilization,
+										bool useEnhancedDeterminism,
+										PxReal lengthScale,
+										bool isExternalForcesEveryTgsIterationEnabled,
+										bool isResidualReportingEnabled
+										);
+
+	virtual								~DynamicsTGSContext();
+
+	// Context
+	virtual	void						destroy()	PX_OVERRIDE;
+	virtual void						update(	Cm::FlushPool& flushPool, PxBaseTask* continuation, PxBaseTask* postPartitioningTask, PxBaseTask* lostTouchTask,
+												PxvNphaseImplementationContext* nphase, PxU32 maxPatchesPerCM, PxU32 maxArticulationLinks, PxReal dt, const PxVec3& gravity, PxBitMapPinned& changedHandleMap)	PX_OVERRIDE;
+	virtual void						mergeResults()	PX_OVERRIDE;
+	virtual void						setSimulationController(PxsSimulationController* simulationController)	PX_OVERRIDE	{ mSimulationController = simulationController; }
+	virtual PxSolverType::Enum			getSolverType()	const	PX_OVERRIDE	{ return PxSolverType::eTGS;	}
+		
+	//~Context
+
+					void				updatePostKinematic(IG::SimpleIslandManager& simpleIslandManager, PxBaseTask* continuation, PxBaseTask* lostTouchTask, PxU32 maxLinks);
+protected:
+
+	// PT: TODO: the thread stats are missing for TGS
+/*#if PX_ENABLE_SIM_STATS
+					void			addThreadStats(const ThreadContext::ThreadSimStats& stats);
+#else
+					PX_CATCH_UNDEFINED_ENABLE_SIM_STATS
+#endif*/
+
+			// Solver helper-methods
+			/**
+			\brief Computes the unconstrained velocity for a given PxsRigidBody
+			\param[in] atom The PxsRigidBody
+			*/
+			void								computeUnconstrainedVelocity(PxsRigidBody* atom)	const;
+
+			void								setDescFromIndices_Contacts(PxSolverConstraintDesc& desc, const IG::IslandSim& islandSim,
+																			const PxsIndexedInteraction& constraint, PxU32 solverBodyOffset, PxTGSSolverBodyVel* solverBodies);
+
+			void								setDescFromIndices_Constraints(	PxSolverConstraintDesc& desc, const IG::IslandSim& islandSim, IG::EdgeIndex edgeIndex,
+																				const PxU32* bodyRemapTable, PxU32 solverBodyOffset, PxTGSSolverBodyVel* solverBodies);
+
+			void solveIsland(const SolverIslandObjectsStep& objects,
+				const PxsIslandIndices& counts,
+				PxU32 solverBodyOffset,
+				IG::SimpleIslandManager& islandManager,
+				PxU32* bodyRemapTable, PxsMaterialManager* materialManager,
+				PxsContactManagerOutputIterator& iterator,
+				PxBaseTask* continuation);
+
+			void prepareBodiesAndConstraints(const SolverIslandObjectsStep& objects,
+				IG::SimpleIslandManager& islandManager,
+				IslandContextStep& islandContext);
+
+			void setupDescs(IslandContextStep& islandContext, const SolverIslandObjectsStep& objects, PxU32* mBodyRemapTable, PxU32 mSolverBodyOffset,
+				PxsContactManagerOutputIterator& outputs);
+
+			void preIntegrateBodies(PxsBodyCore** bodyArray, PxsRigidBody** originalBodyArray,
+				PxTGSSolverBodyVel* solverBodyVelPool, PxTGSSolverBodyTxInertia* solverBodyTxInertia, PxTGSSolverBodyData* solverBodyDataPool2,
+				PxU32* nodeIndexArray, PxU32 bodyCount, const PxVec3& gravity, PxReal dt, PxU32& posIters, PxU32& velIters, PxU32 iteration);
+
+			void setupArticulations(IslandContextStep& islandContext, const PxVec3& gravity, PxReal dt, PxU32& posIters, PxU32& velIters, PxBaseTask* continuation);
+
+			PxU32 setupArticulationInternalConstraints(IslandContextStep& islandContext, PxReal dt, PxReal invStepDt);
+
+			void createSolverConstraints(PxSolverConstraintDesc* contactDescPtr, PxConstraintBatchHeader* headers, PxU32 nbHeaders,
+				PxsContactManagerOutputIterator& outputs, Dy::ThreadContext& islandThreadContext, Dy::ThreadContext& threadContext, PxReal stepDt, PxReal totalDt, 
+				PxReal invStepDt, PxReal biasCoefficient);
+
+			void solveConstraintsIteration(const PxSolverConstraintDesc* const contactDescPtr, const PxConstraintBatchHeader* const batchHeaders, PxU32 nbHeaders, PxReal invStepDt,
+				const PxTGSSolverBodyTxInertia* const solverTxInertia, PxReal elapsedTime, PxReal minPenetration, SolverContext& cache);
+
+			template <bool TSync>
+			void solveConcludeConstraintsIteration(const PxSolverConstraintDesc* const contactDescPtr, const PxConstraintBatchHeader* const batchHeaders, PxU32 nbHeaders,
+				PxTGSSolverBodyTxInertia* solverTxInertia, PxReal elapsedTime, SolverContext& cache, PxU32 iterCount);
+
+			template <bool Sync>
+			void parallelSolveConstraints(const PxSolverConstraintDesc* const contactDescPtr, const PxConstraintBatchHeader* const batchHeaders, PxU32 nbHeaders, PxTGSSolverBodyTxInertia* solverTxInertia,
+				PxReal elapsedTime, PxReal minPenetration, SolverContext& cache, PxU32 iterCount);
+
+			void writebackConstraintsIteration(const PxConstraintBatchHeader* const hdrs, const PxSolverConstraintDesc* const contactDescPtr, PxU32 nbHeaders, SolverContext& cache);
+
+			void parallelWritebackConstraintsIteration(const PxSolverConstraintDesc* const contactDescPtr, const PxConstraintBatchHeader* const batchHeaders, PxU32 nbHeaders, SolverContext& cache);
+
+			void applySubstepGravity(PxsRigidBody** bodies, PxsExternalAccelerationProvider& externalAccelerations,
+				PxU32 count, PxTGSSolverBodyVel* vels, PxReal dt, PxTGSSolverBodyTxInertia* PX_RESTRICT txInertias, PxU32* nodeIndexArray);
+
+			void applySubstepGravityParallel(const SolverIslandObjectsStep& objects, PxTGSSolverBodyVel* solverVels, const PxU32 bodyOffset, PxReal stepDt,
+				const PxU32 nbBodies, PxU32& startGravityIdx, PxU32& nbGravityRemaining, PxU32& targetGravityProgressCount,
+				PxI32* gravityProgressCount, PxI32* gravityCounts, PxU32 unrollSize);
+
+			void applyArticulationSubstepGravityParallel(PxU32& startArticulationIdx, PxU32& targetArticulationProgressCount, PxI32* articulationProgressCount,
+				PxI32* articulationIntegrationCounts, ArticulationSolverDesc* articulationDescs, PxU32 nbArticulations, PxReal stepDt, ThreadContext& threadContext);
+
+			void integrateBodies(PxU32 count, PxTGSSolverBodyVel* vels, PxTGSSolverBodyTxInertia* txInertias, PxReal dt);
+
+			void integrateBodiesAndApplyGravity(const SolverIslandObjectsStep& objects, PxU32 count, PxTGSSolverBodyVel* vels,
+				PxTGSSolverBodyTxInertia* txInertias, PxReal dt, PxU32 posIters);
+
+			void parallelIntegrateBodies(PxTGSSolverBodyVel* vels, PxTGSSolverBodyTxInertia* txInertias,
+				PxU32 count, PxReal dt, PxU32 iteration);
+
+			void copyBackBodies(const SolverIslandObjectsStep& objects,
+				PxTGSSolverBodyVel* vels, PxTGSSolverBodyTxInertia* txInertias,
+				PxTGSSolverBodyData* solverBodyData, PxReal invDt,	IG::IslandSim& islandSim,
+				PxU32 startIdx, PxU32 endIdx);
+
+			void updateArticulations(Dy::ThreadContext& threadContext, PxU32 startIdx, PxU32 endIdx, PxReal dt);
+
+			void stepArticulations(Dy::ThreadContext& threadContext, const PxsIslandIndices& counts, PxReal dt, PxReal stepInvDt);
+
+			void applyArticulationTgsSubstepForces(Dy::ThreadContext& threadContext, PxU32 numArticulations, PxReal stepDt);
+
+			void iterativeSolveIsland(const SolverIslandObjectsStep& objects, const PxsIslandIndices& counts, ThreadContext& mThreadContext,
+				PxReal stepDt, PxReal invStepDt, PxReal totalDt, PxU32 posIters, PxU32 velIters, SolverContext& cache, PxReal biasCoefficient);
+
+			void iterativeSolveIslandParallel(const SolverIslandObjectsStep& objects, const PxsIslandIndices& counts, ThreadContext& mThreadContext,
+				PxReal stepDt,  PxReal totalDt, PxU32 posIters, PxU32 velIters, PxI32* solverCounts, PxI32* integrationCounts, PxI32* articulationIntegrationCounts, PxI32* gravityCounts,
+				PxI32* solverProgressCount, PxI32* integrationProgressCount, PxI32* articulationProgressCount, PxI32* gravityProgressCount, PxU32 solverUnrollSize, PxU32 integrationUnrollSize,
+				PxReal biasCoefficient);
+
+			void endIsland(ThreadContext& mThreadContext);
+
+			void finishSolveIsland(ThreadContext& mThreadContext, const SolverIslandObjectsStep& objects,
+				const PxsIslandIndices& counts, IG::SimpleIslandManager& islandManager, PxBaseTask* continuation);
+
+			PxTGSSolverBodyVel				mWorldSolverBodyVel;
+			PxTGSSolverBodyTxInertia		mWorldSolverBodyTxInertia;
+			PxTGSSolverBodyData				mWorldSolverBodyData2;
+
+			/**
+			\brief Solver constraint desc array
+			*/
+			SolverStepConstraintDescPool	mSolverConstraintDescPool;
+
+			SolverStepConstraintDescPool	mOrderedSolverConstraintDescPool;
+
+			SolverStepConstraintDescPool	mTempSolverConstraintDescPool;
+
+			SolverBodyVelDataPool			mSolverBodyVelPool;
+
+			SolverBodyTxInertiaPool			mSolverBodyTxInertiaPool;
+
+			SolverBodyDataStepPool			mSolverBodyDataPool2;			
+
+		private:
+			bool	mIsExternalForcesEveryTgsIterationEnabled;
+
+			friend class SetupDescsTask;
+			friend class PreIntegrateTask;
+			friend class SetupArticulationTask;
+			friend class SetupArticulationInternalConstraintsTask;
+			friend class SetupSolverConstraintsTask;
+			friend class SolveIslandTask;
+			friend class EndIslandTask;
+			friend class SetupSolverConstraintsSubTask;
+			friend class ParallelSolveTask;
+			friend class PreIntegrateParallelTask;
+			friend class CopyBackTask;
+			friend class UpdateArticTask;
+			friend class FinishSolveIslandTask;
+		};
+
+#if PX_VC 
+#pragma warning(pop)
+#endif
+
+	}
+}
+
+#endif
+
--- a/engine/third_party/physx/source/lowleveldynamics/src/DyThreadContext.cpp
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DyThreadContext.cpp
@@ -0,0 +1,113 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#include "DyThreadContext.h"
+#include "foundation/PxBitUtils.h"
+
+namespace physx
+{
+namespace Dy
+{
+
+ThreadContext::ThreadContext(PxcNpMemBlockPool* memBlockPool) :
+	mFrictionPatchStreamPair				(*memBlockPool),
+	mConstraintBlockManager					(*memBlockPool),
+	mConstraintBlockStream					(*memBlockPool),
+	mNumDifferentBodyConstraints			(0),
+	mNumStaticConstraints					(0),
+	mHasOverflowPartitions					(false),
+	mConstraintsPerPartition				("ThreadContext::mConstraintsPerPartition"),
+	//mPartitionNormalizationBitmap			("ThreadContext::mPartitionNormalizationBitmap"),
+	mBodyCoreArray							(NULL),
+	mRigidBodyArray							(NULL),
+	mArticulationArray						(NULL),
+	motionVelocityArray						(NULL),
+	bodyRemapTable							(NULL),
+	mNodeIndexArray							(NULL),
+	contactConstraintDescArray				(NULL),
+	contactDescArraySize					(0),
+	orderedContactConstraints				(NULL),
+	contactConstraintBatchHeaders			(NULL),
+	numContactConstraintBatches				(0),
+	tempConstraintDescArray					(NULL),
+#if PGS_SUPPORT_COMPOUND_CONSTRAINTS
+	compoundConstraints						("ThreadContext::compoundConstraints"),
+	orderedContactList						("ThreadContext::orderedContactList"),
+	tempContactList							("ThreadContext::tempContactList"),
+	sortIndexArray							("ThreadContext::sortIndexArray"),
+#endif
+	mOrderedContactDescCount				(0),
+	mOrderedFrictionDescCount				(0),
+	mConstraintSize							(0),
+	mAxisConstraintCount					(0),
+	mMaxPartitions							(0),
+	mMaxFrictionPartitions					(0),
+	mMaxSolverPositionIterations			(0),
+	mMaxSolverVelocityIterations			(0),
+	mMaxArticulationLinks					(0),
+	mContactDescPtr							(NULL),
+	mArticulations							("ThreadContext::articulations")
+{
+#if PX_ENABLE_SIM_STATS
+	mThreadSimStats.clear();
+#else
+	PX_CATCH_UNDEFINED_ENABLE_SIM_STATS
+#endif
+	//Defaulted to have space for 16384 bodies
+	//mPartitionNormalizationBitmap.reserve(512); 
+	//Defaulted to have space for 128 partitions (should be more-than-enough)
+	mConstraintsPerPartition.reserve(128);
+}
+
+void ThreadContext::resizeArrays(PxU32 articulationCount)
+{
+	mArticulations.forceSize_Unsafe(0);
+	mArticulations.reserve(PxMax<PxU32>(PxNextPowerOfTwo(articulationCount), 16));
+	mArticulations.forceSize_Unsafe(articulationCount);
+
+	mContactDescPtr = contactConstraintDescArray;
+}
+
+void ThreadContext::reset()
+{
+	// TODO: move these to the PxcNpThreadContext
+	mFrictionPatchStreamPair.reset();
+	mConstraintBlockStream.reset();
+
+	mContactDescPtr = contactConstraintDescArray;
+
+	mAxisConstraintCount = 0;
+	mMaxSolverPositionIterations = 0;
+	mMaxSolverVelocityIterations = 0;
+	mNumDifferentBodyConstraints = 0;
+	mNumStaticConstraints = 0;
+	mConstraintSize = 0;
+}
+
+}
+} 
--- a/engine/third_party/physx/source/lowleveldynamics/src/DyThreadContext.h
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DyThreadContext.h
@@ -0,0 +1,188 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef DY_THREAD_CONTEXT_H
+#define DY_THREAD_CONTEXT_H
+
+#include "foundation/PxTransform.h"
+#include "geomutils/PxContactBuffer.h"
+
+#include "PxvConfig.h"
+#include "PxvDynamics.h"
+#include "PxcThreadCoherentCache.h"
+#include "PxcConstraintBlockStream.h"
+#include "foundation/PxBitMap.h"
+#include "DyVArticulation.h"
+#include "DyFrictionPatchStreamPair.h"
+#include "DySolverConstraintDesc.h"
+#include "DyCorrelationBuffer.h"
+#include "foundation/PxAllocator.h"
+#include "DyResidualAccumulator.h"
+
+namespace physx
+{
+struct PxsIndexedContactManager;
+class PxsRigidBody;
+
+namespace Dy
+{
+
+/*!
+Cache information specific to the software implementation(non common).
+
+See PxcgetThreadContext.
+
+Not thread-safe, so remember to have one object per thread!
+
+TODO! refactor this and rename(it is a general per thread cache). Move transform cache into its own class.
+*/
+class ThreadContext : public PxcThreadCoherentCache<ThreadContext, PxcNpMemBlockPool>::EntryBase
+{
+	PX_NOCOPY(ThreadContext)
+public:
+
+#if PX_ENABLE_SIM_STATS
+	struct ThreadSimStats
+	{
+		void clear()
+		{
+			numActiveConstraints = 0;
+			numActiveDynamicBodies = 0;
+			numActiveKinematicBodies = 0;
+			numAxisSolverConstraints = 0;
+		}
+
+		PxU32 numActiveConstraints;
+		PxU32 numActiveDynamicBodies;
+		PxU32 numActiveKinematicBodies;
+		PxU32 numAxisSolverConstraints;
+
+		Dy::ErrorAccumulatorEx contactErrorAccumulator;
+	};
+#else
+	PX_CATCH_UNDEFINED_ENABLE_SIM_STATS
+#endif
+
+	//TODO: tune cache size based on number of active objects.
+	ThreadContext(PxcNpMemBlockPool* memBlockPool);
+	void reset();
+	void resizeArrays(PxU32 articulationCount);
+
+	// PT: TODO: is there a reason why everything is public except mArticulations ?
+	PX_FORCE_INLINE	PxArray<ArticulationSolverDesc>&	getArticulations()	{ return mArticulations;	}
+
+#if PX_ENABLE_SIM_STATS
+	PX_FORCE_INLINE ThreadSimStats& getSimStats()
+	{
+		return mThreadSimStats;
+	}
+#else
+	PX_CATCH_UNDEFINED_ENABLE_SIM_STATS
+#endif
+
+	PxContactBuffer								mContactBuffer;
+
+		// temporary buffer for correlation
+	PX_ALIGN(16, CorrelationBuffer				mCorrelationBuffer); 
+
+	FrictionPatchStreamPair						mFrictionPatchStreamPair;	// patch streams
+
+	PxsConstraintBlockManager					mConstraintBlockManager;	// for when this thread context is "lead" on an island
+	PxcConstraintBlockStream 					mConstraintBlockStream;		// constraint block pool
+
+	// this stuff is just used for reformatting the solver data. Hopefully we should have a more
+	// sane format for this when the dust settles - so it's just temporary. If we keep this around
+	// here we should move these from public to private
+
+	PxU32										mNumDifferentBodyConstraints;
+	PxU32										mNumStaticConstraints;
+	bool										mHasOverflowPartitions;
+
+	PxArray<PxU32>								mConstraintsPerPartition;
+	//PxArray<PxU32>								mPartitionNormalizationBitmap;	// PT: for PX_NORMALIZE_PARTITIONS
+	PxsBodyCore**								mBodyCoreArray;
+	PxsRigidBody**								mRigidBodyArray;
+	FeatherstoneArticulation**					mArticulationArray;
+	Cm::SpatialVector*							motionVelocityArray;
+	PxU32*										bodyRemapTable;
+	PxU32*										mNodeIndexArray;
+
+	// PT: TODO: unify names around here, some use "m", some don't
+
+	//Constraint info for normal constraint solver
+	PxSolverConstraintDesc*						contactConstraintDescArray;
+	PxU32										contactDescArraySize;
+	PxSolverConstraintDesc*						orderedContactConstraints;
+	PxConstraintBatchHeader*					contactConstraintBatchHeaders;
+	PxU32										numContactConstraintBatches;
+
+	//Constraint info for partitioning
+	PxSolverConstraintDesc*						tempConstraintDescArray;
+
+#if PGS_SUPPORT_COMPOUND_CONSTRAINTS
+	//Info for tracking compound contact managers (temporary data - could use scratch memory!)
+	PxArray<CompoundContactManager>				compoundConstraints;
+
+	//Used for sorting constraints. Temporary, could use scratch memory
+	PxArray<const PxsIndexedContactManager*>	orderedContactList;
+	PxArray<const PxsIndexedContactManager*>	tempContactList;
+	PxArray<PxU32>								sortIndexArray;
+#endif
+
+	PxArray<Cm::SpatialVectorF>					mZVector; // scratch space, used for propagation during constraint prepping
+	PxArray<Cm::SpatialVectorF>					mDeltaV; // scratch space, used temporarily for propagating velocities
+
+	PxU32										mOrderedContactDescCount;
+	PxU32										mOrderedFrictionDescCount;
+
+	PxU32										mConstraintSize;		// PT: TODO: consider removing, this is only used for stats
+	PxU32										mAxisConstraintCount;	// PT: TODO: consider removing, this is only used for stats
+
+	PxU32										mMaxPartitions;
+	PxU32										mMaxFrictionPartitions;
+	PxU32										mMaxSolverPositionIterations;
+	PxU32										mMaxSolverVelocityIterations;
+	PxU32										mMaxArticulationLinks;
+	
+	PxSolverConstraintDesc*						mContactDescPtr;
+private:
+
+	PxArray<ArticulationSolverDesc>				mArticulations;
+
+#if PX_ENABLE_SIM_STATS
+	ThreadSimStats								mThreadSimStats;
+#else
+	PX_CATCH_UNDEFINED_ENABLE_SIM_STATS
+#endif
+};
+
+}
+
+}
+
+#endif
--- a/engine/third_party/physx/source/lowleveldynamics/src/DyThresholdTable.cpp
+++ b/engine/third_party/physx/source/lowleveldynamics/src/DyThresholdTable.cpp
@@ -0,0 +1,67 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#include "foundation/PxMemory.h"
+#include "foundation/PxHash.h"
+#include "foundation/PxAllocator.h"
+#include "DyThresholdTable.h"
+#include "foundation/PxUtilities.h"
+
+namespace physx
+{
+	namespace Dy
+	{
+		bool ThresholdTable::check(const ThresholdStream& stream, const PxU32 nodeIndexA, const PxU32 nodeIndexB, PxReal dt)
+		{
+			PxU32* PX_RESTRICT hashes = mHash;
+			PxU32* PX_RESTRICT nextIndices = mNexts;
+			Pair* PX_RESTRICT pairs = mPairs;
+
+			/*const PxsRigidBody* b0 = PxMin(body0, body1);
+			const PxsRigidBody* b1 = PxMax(body0, body1);*/
+
+			const PxU32 nA = PxMin(nodeIndexA, nodeIndexB);
+			const PxU32 nB = PxMax(nodeIndexA, nodeIndexB);
+
+			PxU32 hashKey = computeHashKey(nodeIndexA, nodeIndexB, mHashSize);
+
+			PxU32 pairIndex = hashes[hashKey];
+			while(NO_INDEX != pairIndex)
+			{
+				Pair& pair = pairs[pairIndex];
+				const PxU32 thresholdStreamIndex = pair.thresholdStreamIndex;
+				PX_ASSERT(thresholdStreamIndex < stream.size());
+				const ThresholdStreamElement& otherElement = stream[thresholdStreamIndex];
+				if(otherElement.nodeIndexA.index()==nA && otherElement.nodeIndexB.index()==nB)
+					return (pair.accumulatedForce > (otherElement.threshold * dt));
+				pairIndex = nextIndices[pairIndex];
+			}
+			return false;
+		}
+	}
+}