feat(physics): wire physx sdk into build

2026-04-15 12:22:15 +08:00
parent 5bf258df6d
commit 31f40e2cbb
2044 changed files with 752623 additions and 1 deletions
--- a/engine/third_party/physx/source/gpusolver/include/PxgConstraint.h
+++ b/engine/third_party/physx/source/gpusolver/include/PxgConstraint.h
@@ -0,0 +1,199 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef PXG_CONSTRAINT_H
+#define PXG_CONSTRAINT_H
+
+#include "PxvConfig.h"
+#include "foundation/PxSimpleTypes.h"
+#include "foundation/PxVec3.h"
+#include "vector_types.h"
+
+namespace physx
+{
+
+struct PxgSolverBodyData;
+struct PxConstraintInvMassScale;
+
+struct PxgSolverContactHeader
+{
+	float4	invMass0_1_angDom0_1;
+	float4	normal_staticFriction;
+
+	PxU32	flags;
+	PxU32	numNormalConstr;
+	PxU32	forceWritebackOffset;
+	PxReal	accumNormalForce;
+}; 
+
+PX_COMPILE_TIME_ASSERT(sizeof(PxgSolverContactHeader) == 48);
+
+/**
+\brief A single articulation contact point for the solver.
+*/
+struct PxgSolverContactPointExt
+{
+	PxVec3	angDeltaVA;						//12	12
+	PxVec3  linDeltaVA;						//12	24
+	PxVec3	angDeltaVB;						//12	36
+	PxVec3	linDeltaVB;						//12	48
+	PxVec3	raXn;							//12	60
+	PxVec3	rbXn;							//12	72
+	PxReal	velMultiplier;					//4		76
+	PxReal	maxImpulse;						//4		80
+	PxReal	biasedErr;						//4		84
+	PxReal	unbiasedErr;					//4		88
+	PxReal	appliedForce;					//4		92
+	PxU32	padding;						//4		96
+}; 
+
+
+PX_COMPILE_TIME_ASSERT(sizeof(PxgSolverContactPointExt) == 96);
+
+struct PxgSolverFrictionHeader
+{
+	float4	frictionNormals[2];		
+	PxU32	numFrictionConstr;			
+	PxReal	dynamicFriction;			
+	PxU32	broken;					
+};
+
+/**
+\brief A single articulation friction constraint for the solver.
+*/
+#if PX_VC
+#pragma warning(push)
+#pragma warning(disable : 4324)
+#endif
+struct PX_ALIGN_PREFIX(16) PxgSolverContactFrictionExt
+{
+	PxVec3	angDeltaVA;						//12	12
+	PxVec3  linDeltaVA;						//12	24
+	PxVec3	angDeltaVB;						//12	36
+	PxVec3	linDeltaVB;						//12	48
+	PxVec3	raXn;							//12	60
+	PxVec3	rbXn;							//12	72
+	PxReal	velMultiplier;					//4		76
+	PxReal	targetVel;						//4		80
+	PxReal	bias;							//4		84
+	PxReal	appliedForce;					//4		88
+	PxU32	padding[2];						//8		96
+
+} PX_ALIGN_SUFFIX(16); 
+#if PX_VC
+#pragma warning(pop)
+#endif
+
+struct PxgContactParams
+{
+	PxgSolverContactHeader* contactHeader;
+	PxgSolverFrictionHeader* frictionHeader;
+	PxgSolverContactPointExt* solverContacts;
+	PxgSolverContactFrictionExt* solverFrictions;
+};
+
+PX_COMPILE_TIME_ASSERT(sizeof(PxgSolverContactFrictionExt) == 96);
+
+
+struct PxgTGSSolverContactHeader
+{
+	float4	dom0_1_angDom0_1;			//16
+	float4	normal_maxPenBias;			//32
+	
+	PxReal	staticFriction;
+	PxReal	dynamicFriction;
+	PxReal	minNormalForce;
+	PxU32	flags;						//48
+
+	PxU16	numNormalConstr;
+	PxU16	numFrictionConstr;
+	PxU32	forceWritebackOffset;
+	PxU32	broken;						
+	PxU32	pad;						//64
+
+};
+
+PX_COMPILE_TIME_ASSERT(sizeof(PxgTGSSolverContactHeader) == 64);
+
+struct PxgTGSSolverContactPointExt
+{
+	//Grouped together in contiguous memory so we can load all 48 bytes in a single instruction
+	PxVec3	angDeltaVA;					//12	12
+	PxVec3  linDeltaVA;					//12	24
+	PxVec3	angDeltaVB;					//12	36
+	PxVec3	linDeltaVB;					//12	48
+	
+	//Grouped so we can load 24 bytes in single instruction
+	PxVec3	raXn;						//12	60
+	PxVec3	rbXn;						//12	72
+
+	//All the loose items - loaded incoherently
+	PxReal	separation;					//4		76
+	PxReal	velMultiplier;				//4		80
+	PxReal	targetVelocity;				//4		84
+	PxReal	biasCoefficient;			//4		88
+	PxReal	maxImpulse;					//4		92
+	PxReal	appliedForce;				//4		96
+};
+
+struct PxgTGSSolverFrictionExt
+{
+	//Grouped together in contiguous memory so we can load all 48 bytes in a single instruction
+	PxVec3	angDeltaVA;					//12	12
+	PxVec3  linDeltaVA;					//12	24
+	PxVec3	angDeltaVB;					//12	36
+	PxVec3	linDeltaVB;					//12	48
+
+	//Grouped so we can load 24 bytes in single instruction
+	PxVec3	raXn;						//12	60
+	PxVec3	rbXn;						//12	72
+
+	//Loose items - loaded incoherently
+	PxVec3	normal;						//12	84
+	PxReal	error;						//4		88
+	PxReal	targetVel;					//4		92
+	PxReal	velMultiplier;				//4		96
+
+	PxReal	biasScale;					//4		100
+	PxReal	frictionScale;				//4		104
+	PxReal	appliedForce;				//4		108
+	PxU32	pad;						//4		112
+};
+
+struct PxgTGSContactParams
+{
+	PxgTGSSolverContactHeader* contactHeader;
+	PxgTGSSolverContactPointExt* solverContacts;
+	PxgTGSSolverFrictionExt* solverFrictions;
+};
+
+
+}
+
+#endif
+
--- a/engine/third_party/physx/source/gpusolver/include/PxgConstraintBlock.h
+++ b/engine/third_party/physx/source/gpusolver/include/PxgConstraintBlock.h
@@ -0,0 +1,237 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef PXG_CONSTRAINT_BLOCK_H
+#define PXG_CONSTRAINT_BLOCK_H
+
+#include "PxvConfig.h"
+#include "foundation/PxSimpleTypes.h"
+#include "foundation/PxVec3.h"
+#include "PxgSolverBody.h"
+
+namespace physx
+{
+
+#if PX_VC
+#pragma warning(push)
+#pragma warning(disable : 4324)
+#endif
+
+struct PxgBlockSolverContactHeader	
+{
+	PX_ALIGN(128, float4	invMass0_1_angDom0_1[32]);		//512		512
+	PX_ALIGN(128, float4	normal_staticFriction[32]);		//1024		512
+	PX_ALIGN(128, PxReal	accumNormalForce[32]);
+	//Only used by articulation constraints. Forces the minimum normal force for friction.
+	//Without this, articulations can drift due to no normal force when multi-link systems contact with surfaces.
+	PX_ALIGN(128, PxReal	minNormalForce[32]);
+
+	PX_ALIGN(128, PxU32		flags[32]);						//1152		128
+	PX_ALIGN(128, PxU32		numNormalConstr[32]);			//1280		128
+	PX_ALIGN(128, PxU32		forceWritebackOffset[32]);		//1408		128
+
+	// To use different mass for mass-splitting every sub-timestep (or iteration),
+	// recipResponse, velMultipler, biasCoefficient, etc. are computed every sub-timestep (or iteration).
+	// To compute them every sub-timestep (or iteration), restitution and cfm are additionally stored.
+	// This does not change the previous impulse formulation, but a different mass is used due to mass-splitting.
+	PX_ALIGN(128, PxReal	restitution[32]);
+	PX_ALIGN(128, PxReal	cfm[32]);
+
+};
+
+struct PxgBlockSolverFrictionHeader
+{
+	PX_ALIGN(128, float4	frictionNormals[2][32]);		//1024		1024
+	PX_ALIGN(128, PxU32		numFrictionConstr[32]);			//1152		128
+	PX_ALIGN(128, PxReal	dynamicFriction[32]);			//1280		128
+	PX_ALIGN(128, PxU32		broken[32]);					//1408		128
+};
+
+//PX_COMPILE_TIME_ASSERT(sizeof(PxgBlockSolverContactHeader) == 1280);
+
+/**
+\brief A single rigid body contact point for the solver.
+*/
+struct PxgBlockSolverContactPoint
+{
+	// To use different mass for mass-splitting every sub-timestep (or iteration),
+	// unitResponse, recipResponse, velMultiplier, etc. are computed every sub-timestep (or iteration).
+	// To compute them at every sub-timestep (or iteration), resp0, resp1, and other relevant data are stored additionally.
+	// This does not change the previous impulse formulation, but a different mass is used due to mass-splitting.
+
+	PX_ALIGN(128, float4	raXn_targetVelocity[32]); 
+	PX_ALIGN(128, float4	rbXn_maxImpulse[32]);
+	PX_ALIGN(128, PxReal	appliedForce[32]);
+
+	PX_ALIGN(128, PxReal	resp0[32]);
+	PX_ALIGN(128, PxReal	resp1[32]);
+
+	// Two coefficients used in "queryReducedCompliantContactCoefficients" and "computeCompliantContactCoefficients"
+	PX_ALIGN(128, PxReal	coeff0[32]); 
+	PX_ALIGN(128, PxReal	coeff1[32]);
+};
+
+struct PxgArticulationBlockResponse
+{
+	PX_ALIGN(128, float	deltaRALin_x[32]);
+	PX_ALIGN(128, float	deltaRALin_y[32]);
+	PX_ALIGN(128, float	deltaRALin_z[32]);
+	PX_ALIGN(128, float	deltaRAAng_x[32]);
+	PX_ALIGN(128, float	deltaRAAng_y[32]);
+	PX_ALIGN(128, float	deltaRAAng_z[32]);
+	PX_ALIGN(128, float	deltaRBLin_x[32]);
+	PX_ALIGN(128, float	deltaRBLin_y[32]);
+	PX_ALIGN(128, float	deltaRBLin_z[32]);
+	PX_ALIGN(128, float	deltaRBAng_x[32]);
+	PX_ALIGN(128, float	deltaRBAng_y[32]);
+	PX_ALIGN(128, float	deltaRBAng_z[32]);
+};
+
+/**
+\brief A single friction constraint for the solver.
+*/
+struct PxgBlockSolverContactFriction
+{
+	// To use different mass for mass-splitting every sub-timestep (or iteration),
+	// unitResponse, recipResponse, velMultiplier, etc. are computed every sub-timestep (or iteration).
+	// To compute them at every sub-timestep (or iteration), resp0, resp1, and other relevant data are stored additionally.
+	// This does not change the previous impulse formulation, but a different mass is used due to mass-splitting.
+
+	PX_ALIGN(128, float4	raXn_bias[32]);
+	PX_ALIGN(128, float4	rbXn_targetVelW[32]);
+	PX_ALIGN(128, PxReal	appliedForce[32]);
+	PX_ALIGN(128, PxReal	resp0[32]);
+	PX_ALIGN(128, PxReal	resp1[32]);
+};
+
+
+struct PxgTGSBlockSolverContactHeader
+{
+	PX_ALIGN(128, float4	invMass0_1_angDom0_1[32]);		//512		512
+	PX_ALIGN(128, float4	normal_staticFriction[32]);		//1024		512
+
+	//Only used by articulation constraints. Forces the minimum normal force for friction.
+	//Without this, articulations can drift due to no normal force when multi-link systems contact with surfaces.
+	PX_ALIGN(128, PxReal	minNormalForce[32]);
+
+	PX_ALIGN(128, PxF32		maxPenBias[32]);				//1152		128
+	PX_ALIGN(128, PxU32		flags[32]);						//1408		128
+	PX_ALIGN(128, PxU32		numNormalConstr[32]);			//1536		128
+	PX_ALIGN(128, PxU32		forceWritebackOffset[32]);		//1664		128
+
+	// To use different mass for mass-splitting every sub-timestep (or iteration),
+	// recipResponse, velMultipler, biasCoefficient, etc. are computed every sub-timestep (or iteration).
+	// To compute them every sub-timestep (or iteration), restitution, cfm, and p8 are additionally stored.
+	// This does not change the previous impulse formulation, but a different mass is used due to mass-splitting.
+
+	PX_ALIGN(128, PxReal	restitutionXdt[32]);
+	PX_ALIGN(128, PxReal	cfm[32]);						
+	PX_ALIGN(128, PxReal	p8[32]);						
+};
+
+struct PxgTGSBlockSolverFrictionHeader
+{
+	PX_ALIGN(128, float4	frictionNormals[2][32]);		//1024		1024
+	PX_ALIGN(128, PxU32		numFrictionConstr[32]);			//1152		128
+	PX_ALIGN(128, PxReal	dynamicFriction[32]);			//1280		128
+	PX_ALIGN(128, PxU32		broken[32]);					//1408		128
+	PX_ALIGN(128, PxReal	biasCoefficient[32]);
+	PX_ALIGN(128, PxReal	torsionalFrictionScale[32]);
+};
+
+//PX_COMPILE_TIME_ASSERT(sizeof(PxgBlockSolverContactHeader) == 1280);
+
+/**
+\brief A single rigid body contact point for the solver.
+*/
+struct PxgTGSBlockSolverContactPoint
+{
+	// To use different mass for mass-splitting every sub-timestep (or iteration),
+	// unitResponse, recipResponse, velMultiplier, etc. are computed every sub-timestep (or iteration).
+	// To compute them at every sub-timestep (or iteration), resp0, resp1, and other relevant data are stored additionally.
+	// This does not change the previous impulse formulation, but a different mass is used due to mass-splitting.
+
+	PX_ALIGN(128, float4	raXn_extraCoeff[32]); // For contact constraints, extraCoeff is the compliant contact coefficient "a"
+												  // used in "computeCompliantContactCoefficientsTGS".
+
+	PX_ALIGN(128, float4	rbXn_targetVelW[32]);
+	PX_ALIGN(128, PxReal	separation[32]);
+	PX_ALIGN(128, PxReal	maxImpulse[32]);
+	PX_ALIGN(128, PxReal	appliedForce[32]);
+	PX_ALIGN(128, PxReal	biasCoefficient[32]);
+
+	PX_ALIGN(128, PxReal	resp0[32]);
+	PX_ALIGN(128, PxReal	resp1[32]);
+};
+
+/**
+\brief A single friction constraint for the solver.
+*/
+struct PxgTGSBlockSolverContactFriction
+{
+	// To use different mass for mass-splitting every sub-timestep (or iteration),
+	// unitResponse, recipResponse, velMultiplier, etc. are computed every sub-timestep (or iteration).
+	// To compute them every sub-timestep (or iteration), resp0 and resp1 are stored separately.
+	// This does not change the previous impulse formulation, but a different mass is used due to mass-splitting.
+
+	PX_ALIGN(128, float4	raXn_error[32]); 
+	PX_ALIGN(128, float4	rbXn_targetVelW[32]);
+	PX_ALIGN(128, PxReal	appliedForce[32]);
+
+	PX_ALIGN(128, PxReal	resp0[32]);
+	PX_ALIGN(128, PxReal	resp1[32]);
+};
+
+struct PxgContactBlockParams
+{
+	PxgBlockSolverContactHeader*	blockContactHeader;
+	PxgBlockSolverFrictionHeader*	blockFrictionHeader;
+	PxgBlockSolverContactPoint*		blockContactPoints;
+	PxgBlockSolverContactFriction*	blockFrictions;
+};
+
+struct PxgTGSContactBlockParams
+{
+	PxgTGSBlockSolverContactHeader*		blockContactHeader;
+	PxgTGSBlockSolverFrictionHeader*	blockFrictionHeader;
+	PxgTGSBlockSolverContactPoint*		blockContactPoints;
+	PxgTGSBlockSolverContactFriction*	blockFrictions;
+};
+
+
+
+#if PX_VC
+#pragma warning(pop)
+#endif
+
+}
+
+
+
+#endif
+
--- a/engine/third_party/physx/source/gpusolver/include/PxgConstraintHelper.h
+++ b/engine/third_party/physx/source/gpusolver/include/PxgConstraintHelper.h
@@ -0,0 +1,314 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef PXG_CONSTRAINT_HELPER_H
+#define PXG_CONSTRAINT_HELPER_H
+
+#include "PxgD6JointLimit.h"
+#include "foundation/PxQuat.h"
+#include "foundation/PxMathUtils.h"
+#include "CmConeLimitHelper.h"
+
+// PT: TODO: refactor/share remaining code. One reason for the duplication is that the CPU code uses
+// SIMD here and there, while the GPU code doesn't. But we could still merge the two eventually.
+
+namespace physx
+{
+	// PT: TODO: this is a duplicate of the one in Extensions, but less robust?
+	PX_CUDA_CALLABLE PX_INLINE void computeJacobianAxes(PxVec3 row[3], const PxQuat& qa, const PxQuat& qb)
+	{
+		// Compute jacobian matrix for (qa* qb)  [[* means conjugate in this expr]]
+		// d/dt (qa* qb) = 1/2 L(qa*) R(qb) (omega_b - omega_a)
+		// result is L(qa*) R(qb), where L(q) and R(q) are left/right q multiply matrix
+
+		PxReal wa = qa.w, wb = qb.w;
+		const PxVec3 va(qa.x,qa.y,qa.z), vb(qb.x,qb.y,qb.z);
+
+		const PxVec3 c = vb*wa + va*wb;
+		const PxReal d = wa*wb - va.dot(vb);
+
+		row[0] = (va * vb.x + vb * va.x + PxVec3(d,     c.z, -c.y)) * 0.5f;
+		row[1] = (va * vb.y + vb * va.y + PxVec3(-c.z,  d,    c.x)) * 0.5f;
+		row[2] = (va * vb.z + vb * va.z + PxVec3(c.y,   -c.x,   d)) * 0.5f;
+	}
+
+	PX_INLINE PX_CUDA_CALLABLE void computeJointFrames(PxTransform& cA2w, PxTransform& cB2w, const PxgJointData& data, const PxTransform& bA2w, const PxTransform& bB2w)
+	{
+		PX_ASSERT(bA2w.isValid() && bB2w.isValid());
+
+		cA2w = bA2w.transform(data.c2b[0]);
+		cB2w = bB2w.transform(data.c2b[1]);
+
+		PX_ASSERT(cA2w.isValid() && cB2w.isValid());
+	}
+
+	class PxgConstraintHelper
+	{
+		PxVec3 mRa, mRb;
+
+	public:
+		PX_CUDA_CALLABLE PxgConstraintHelper(/*Px1DConstraint* c,*/ const PxVec3& ra, const PxVec3& rb)
+			: /*mConstraints(c), mCurrent(c),*/ mRa(ra), mRb(rb)	{}
+
+		PX_CUDA_CALLABLE PxgConstraintHelper(
+			PxTransform& cA2w, PxTransform& cB2w,
+			const PxgJointData& data, const PxTransform& bA2w, const PxTransform& bB2w)
+		{
+			computeJointFrames(cA2w, cB2w, data, bA2w, bB2w);
+
+			mRa = cB2w.p - bA2w.p;
+			mRb = cB2w.p - bB2w.p;
+		}
+
+		// hard linear & angular
+		PX_FORCE_INLINE void linearHard(Px1DConstraint* c, const PxVec3& axis, PxReal posErr)
+		{
+			linear(c, axis, posErr, PxConstraintSolveHint::eEQUALITY);
+			c->flags |= Px1DConstraintFlag::eOUTPUT_FORCE;
+		}
+
+		PX_CUDA_CALLABLE PX_FORCE_INLINE void angularHard(Px1DConstraint* c, const PxVec3& axis, PxReal posErr)
+		{
+			angular(c, axis, posErr, PxConstraintSolveHint::eEQUALITY);
+			c->flags |= Px1DConstraintFlag::eOUTPUT_FORCE;
+		}
+
+		// limited linear & angular
+		PX_CUDA_CALLABLE PX_FORCE_INLINE PxU32 linearLimit(Px1DConstraint* c, PxU32 currentIndex, const PxVec3& axis, PxReal ordinate, PxReal limitValue, const PxgJointLimitParameters& limit)
+		{
+			if(!limit.isSoft() || ordinate > limitValue)
+			{
+				Px1DConstraint* cConstraint = &c[currentIndex++];
+				linear(cConstraint, axis,limitValue - ordinate, PxConstraintSolveHint::eNONE);
+				addLimit(cConstraint ,limit);
+			}
+			return currentIndex;
+		}
+
+		PX_FORCE_INLINE PxU32 angularLimit(Px1DConstraint* c, PxU32 currentIndex, const PxVec3& axis, PxReal ordinate, PxReal limitValue, PxReal pad, const PxgJointLimitParameters& limit)
+		{
+			if(limit.isSoft())
+				pad = 0;
+
+			if(ordinate + pad > limitValue)
+			{
+				Px1DConstraint* cConstraint = &c[currentIndex++];
+				angular(cConstraint, axis,limitValue - ordinate, PxConstraintSolveHint::eNONE);
+				addLimit(cConstraint,limit);
+			}
+			return currentIndex;
+		}
+
+		PX_CUDA_CALLABLE PX_FORCE_INLINE void angularLimit(Px1DConstraint* c, const PxVec3& axis, PxReal error, const PxgJointLimitParameters& limit)const
+		{
+			angular(c, axis,error, PxConstraintSolveHint::eNONE);
+			addLimit(c,limit);
+		}
+
+		PX_CUDA_CALLABLE PX_FORCE_INLINE PxU32 anglePair(Px1DConstraint* c, PxU32 currentIndex, PxReal angle, PxReal lower, PxReal upper, const PxVec3& axis, const PxgJointLimitParameters& limit)const
+		{
+			PX_ASSERT(lower<upper);
+			const bool softLimit = limit.isSoft();
+
+			if (!softLimit || angle < lower)
+				angularLimit(&c[currentIndex++], -axis, -(lower - angle), limit);
+			if (!softLimit || angle > upper)
+				angularLimit(&c[currentIndex++], axis, (upper - angle), limit);
+
+			return currentIndex;
+		}
+
+		// driven linear & angular
+
+		PX_CUDA_CALLABLE PX_FORCE_INLINE void linear(Px1DConstraint* c, const PxVec3& axis, PxReal velTarget, PxReal error, const PxgD6JointDrive& drive)const
+		{
+			linear(c, axis,error,PxConstraintSolveHint::eNONE);
+			addDrive(c,velTarget,drive);
+		}
+
+		PX_CUDA_CALLABLE PX_FORCE_INLINE void angular(Px1DConstraint* c, const PxVec3& axis, PxReal velTarget, PxReal error, const PxgD6JointDrive& drive, PxConstraintSolveHint::Enum hint = PxConstraintSolveHint::eNONE)const
+		{
+			angular(c, axis,error,hint);
+			addDrive(c,velTarget,drive);
+		}
+
+		//PX_CUDA_CALLABLE PX_FORCE_INLINE PxU32 getCount() { return PxU32(mCurrent - mConstraints); }
+
+		PX_CUDA_CALLABLE PxU32 prepareLockedAxes(Px1DConstraint* c, PxU32 currentIndex, const PxQuat& qA, const PxQuat& qB, const PxVec3& cB2cAp, PxU32 lin, PxU32 ang,
+			PxVec3& raOut)
+		{
+			//Px1DConstraint* current = mCurrent;
+			//const PxU32 startIndex = currentIndex;
+
+			PxVec3 errorVector(0.f);
+
+			PxVec3 ra = mRa;
+
+			if(lin)
+			{
+				PxMat33 axes(qA);
+
+				if (lin & 1) errorVector -= axes.column0 * cB2cAp.x;
+				if (lin & 2) errorVector -= axes.column1 * cB2cAp.y;
+				if (lin & 4) errorVector -= axes.column2 * cB2cAp.z;
+
+				ra += errorVector;
+
+				if(lin&1) linear(&c[currentIndex++], axes[0], ra, mRb, -cB2cAp[0], PxConstraintSolveHint::eEQUALITY, Px1DConstraintFlag::eOUTPUT_FORCE);
+				if(lin&2) linear(&c[currentIndex++], axes[1], ra, mRb, -cB2cAp[1], PxConstraintSolveHint::eEQUALITY, Px1DConstraintFlag::eOUTPUT_FORCE);
+				if(lin&4) linear(&c[currentIndex++], axes[2], ra, mRb, -cB2cAp[2], PxConstraintSolveHint::eEQUALITY, Px1DConstraintFlag::eOUTPUT_FORCE);
+			}
+
+			if (ang)
+			{
+				PxQuat qB2qA = qA.getConjugate() * qB;
+				/*if (qB2qA.w<0)
+					qB2qA = -qB2qA;*/
+
+				PxVec3 row[3];
+				computeJacobianAxes(row, qA, qB);
+				PxVec3 imp = qB2qA.getImaginaryPart();
+				if (ang & 1) angular(&c[currentIndex++], row[0], -imp.x, PxConstraintSolveHint::eEQUALITY, Px1DConstraintFlag::eOUTPUT_FORCE);
+				if (ang & 2) angular(&c[currentIndex++], row[1], -imp.y, PxConstraintSolveHint::eEQUALITY, Px1DConstraintFlag::eOUTPUT_FORCE);
+				if (ang & 4) angular(&c[currentIndex++], row[2], -imp.z, PxConstraintSolveHint::eEQUALITY, Px1DConstraintFlag::eOUTPUT_FORCE);
+			}
+
+			raOut = ra;
+
+			return currentIndex;
+		}
+
+	private:
+		PX_CUDA_CALLABLE PX_FORCE_INLINE void linear(Px1DConstraint* c, const PxVec3& axis, PxReal posErr, PxConstraintSolveHint::Enum hint)const
+		{
+			c->solveHint		= PxU16(hint);
+			c->linear0 = axis;					c->angular0	= mRa.cross(axis);
+			c->linear1 = axis;					c->angular1 = mRb.cross(axis);
+			PX_ASSERT(c->linear0.isFinite());
+			PX_ASSERT(c->linear1.isFinite());
+			PX_ASSERT(c->angular0.isFinite());
+			PX_ASSERT(c->angular1.isFinite());
+
+			c->geometricError	= posErr;
+
+			c->flags = 0;
+			c->minImpulse = -PX_MAX_REAL;
+			c->maxImpulse = PX_MAX_REAL;
+			c->mods.spring.damping = 0.f;
+			c->mods.spring.stiffness = 0.f;
+			c->velocityTarget =0.f;
+		}
+
+		PX_CUDA_CALLABLE PX_FORCE_INLINE void linear(Px1DConstraint* c, const PxVec3& axis, const PxVec3& ra, const PxVec3& rb, PxReal posErr, PxConstraintSolveHint::Enum hint,
+			PxU32 flags = 0)const
+		{
+			c->solveHint = PxU16(hint);
+			c->linear0 = axis;					c->angular0 = ra.cross(axis);
+			c->linear1 = axis;					c->angular1 = rb.cross(axis);
+			PX_ASSERT(c->linear0.isFinite());
+			PX_ASSERT(c->linear1.isFinite());
+			PX_ASSERT(c->angular0.isFinite());
+			PX_ASSERT(c->angular1.isFinite());
+
+			c->geometricError = posErr;
+
+			c->flags = flags;
+			c->minImpulse = -PX_MAX_REAL;
+			c->maxImpulse = PX_MAX_REAL;
+			c->mods.spring.damping = 0.f;
+			c->mods.spring.stiffness = 0.f;
+			c->velocityTarget = 0.f;
+		}
+
+		PX_CUDA_CALLABLE PX_FORCE_INLINE void angular(Px1DConstraint* c, const PxVec3& axis, PxReal posErr, PxConstraintSolveHint::Enum hint,
+			PxU32 flags = 0)const
+		{
+			c->solveHint		= PxU16(hint);
+			c->linear0 = PxVec3(0);		c->angular0			= axis;
+			c->linear1 = PxVec3(0);		c->angular1			= axis;
+			c->geometricError	= posErr;
+
+			c->flags = flags | Px1DConstraintFlag::eANGULAR_CONSTRAINT;
+			c->minImpulse = -PX_MAX_REAL;
+			c->maxImpulse = PX_MAX_REAL;
+			c->mods.spring.damping = 0.f;
+			c->mods.spring.stiffness = 0.f;
+			c->velocityTarget = 0.f;
+		}
+
+		PX_CUDA_CALLABLE void addLimit(Px1DConstraint* c, const PxgJointLimitParameters& limit)const
+		{
+			PxU16 flags = PxU16(c->flags | Px1DConstraintFlag::eOUTPUT_FORCE);
+
+			if(limit.isSoft())
+			{
+				flags |= Px1DConstraintFlag::eSPRING;
+				c->mods.spring.stiffness = limit.stiffness;
+				c->mods.spring.damping = limit.damping;
+			}
+			else
+			{
+				c->solveHint = PxConstraintSolveHint::eINEQUALITY;
+				c->mods.bounce.restitution = limit.restitution;
+				c->mods.bounce.velocityThreshold = limit.bounceThreshold;
+				if(c->geometricError>0)
+					flags |= Px1DConstraintFlag::eKEEPBIAS;
+				if(limit.restitution>0)
+					flags |= Px1DConstraintFlag::eRESTITUTION;
+			}
+
+			c->flags = flags;
+			c->minImpulse = 0;
+		}
+
+		PX_CUDA_CALLABLE void addDrive(Px1DConstraint* c, PxReal velTarget, const PxgD6JointDrive& drive)const
+		{
+			c->velocityTarget = velTarget;
+
+			PxU16 flags = PxU16(c->flags | Px1DConstraintFlag::eSPRING | Px1DConstraintFlag::eHAS_DRIVE_LIMIT);
+
+			if(drive.flags & PxgD6JointDriveFlag::eACCELERATION)
+				flags |= Px1DConstraintFlag::eACCELERATION_SPRING;
+
+			if (drive.flags & PxgD6JointDriveFlag::eOUTPUT_FORCE)
+				flags |= Px1DConstraintFlag::eOUTPUT_FORCE;
+
+			c->flags = flags;
+			c->mods.spring.stiffness = drive.stiffness;
+			c->mods.spring.damping = drive.damping;
+				
+			c->minImpulse = -drive.forceLimit;
+			c->maxImpulse = drive.forceLimit;
+
+			//PX_ASSERT(c->linear0.isFinite());
+			//PX_ASSERT(c->angular0.isFinite());
+		}
+	};
+}
+
+#endif
--- a/engine/third_party/physx/source/gpusolver/include/PxgConstraintPartition.h
+++ b/engine/third_party/physx/source/gpusolver/include/PxgConstraintPartition.h
@@ -0,0 +1,455 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef PXG_CONSTRAINT_PARTITION_H
+#define PXG_CONSTRAINT_PARTITION_H
+
+#define PX_PARTITION_COMPACTION 1
+
+#include "foundation/PxPinnedArray.h"
+#include "foundation/PxSList.h"
+#include "foundation/PxUserAllocated.h"
+#include "foundation/PxUtilities.h"
+#include "PxgSolverBody.h"
+#include "PxgSolverConstraintDesc.h"
+#include "PxsSimpleIslandManager.h"
+#include "PxgDynamicsConfiguration.h"
+#include "PxgEdgeType.h"
+#include "PxgPartitionNode.h"
+#include "PxsPartitionEdge.h"
+
+namespace physx
+{
+class PxsContactManagerOutputIterator;
+class PxgBodySimManager;
+class PxgJointManager;
+struct PxsContactManagerOutputCounts;
+
+namespace Cm
+{
+	class FlushPool;
+}
+
+#define SLAB_SIZE 512
+
+// PT: defines controlling a large dense array of 32 pointers per node using up a lot of memory.
+//
+// STORE_INDICES_IN_NODE_ENTRIES stores indices instead of PartitionEdge ptrs in NodeEntries.
+// Half the memory usage compared to initial version but more expensive address computation
+// and we still decode and use a pointer in the end.
+//
+// STORE_EDGE_DATA_IN_NODE_ENTRIES stores the edge data directly instead of indices.
+// One less indirection but same memory usage as initial version.
+//
+// Initial version					| Less indirection	| Better Mem usage
+// ---------------------------------|-------------------|---------------------
+// Initial version					| No				| No
+// STORE_INDICES_IN_NODE_ENTRIES	| No				| Yes
+// STORE_EDGE_DATA_IN_NODE_ENTRIES	| Yes				| No
+//
+// Jury is still out regarding which one is best for perf. Meanwhile we use the one with best mem usage.
+#define STORE_INDICES_IN_NODE_ENTRIES		1
+#if STORE_INDICES_IN_NODE_ENTRIES
+	#define STORE_EDGE_DATA_IN_NODE_ENTRIES	0
+	#if STORE_EDGE_DATA_IN_NODE_ENTRIES
+		struct EdgeData
+		{
+			PxU32	mUniqueIndex;
+			PxU32	mNode0Index;
+		};
+		typedef EdgeData NodeEntryStorage;
+		typedef EdgeData NodeEntryDecoded;
+		PX_FORCE_INLINE	void	resetNodeEntryStorage(EdgeData& edge)	{ edge.mUniqueIndex = IG_INVALID_EDGE;	}
+		PX_FORCE_INLINE	PxU32	getUniqueId(const EdgeData& edge)		{ return edge.mUniqueIndex;				}
+		PX_FORCE_INLINE	PxU32	getNode0Index(const EdgeData& edge)		{ return edge.mNode0Index;				}
+	#else
+		typedef PxU32 NodeEntryStorage;
+		typedef const PartitionEdge* NodeEntryDecoded;
+		PX_FORCE_INLINE	void	resetNodeEntryStorage(PxU32& edge)			{ edge = IG_INVALID_EDGE;		}
+		PX_FORCE_INLINE	PxU32	getUniqueId(const PartitionEdge* edge)		{ return edge->mUniqueIndex;	}
+		PX_FORCE_INLINE	PxU32	getNode0Index(const PartitionEdge* edge)	{ return edge->mNode0.index();	}
+	#endif
+#else
+	typedef const PartitionEdge* NodeEntryStorage;
+	typedef const PartitionEdge* NodeEntryDecoded;
+	PX_FORCE_INLINE	void	resetNodeEntryStorage(const PartitionEdge*& edge)	{ edge = NULL;					}
+	PX_FORCE_INLINE	PxU32	getUniqueId(const PartitionEdge* edge)				{ return edge->mUniqueIndex;	}
+	PX_FORCE_INLINE	PxU32	getNode0Index(const PartitionEdge* edge)			{ return edge->mNode0.index();	}
+#endif
+
+typedef	Cm::BlockArray<PxU32>	PartitionIndices;
+//typedef	PxArray<PxU32>	PartitionIndices;
+
+struct PartitionEdgeSlab
+{
+	PartitionEdge mEdges[SLAB_SIZE];	//! The slabs
+};
+
+class PartitionEdgeManager
+{
+	PartitionEdge* mFreeEdges;
+
+	PxArray<PartitionEdgeSlab*> mPartitionEdgeSlabs;
+	PxArray<void*> mMemory;
+
+	PxU32 mEdgeCount;
+
+	PX_NOINLINE void allocateSlab();
+public:
+	PartitionEdgeManager();
+	~PartitionEdgeManager();
+
+	PX_FORCE_INLINE PartitionEdge*	getEdge(IG::EdgeIndex index);
+	PX_FORCE_INLINE void			putEdge(PartitionEdge* edge);
+
+	PX_FORCE_INLINE const PartitionEdge*	getPartitionEdge(PxU32 uniqueId)	const
+	{
+		return &mPartitionEdgeSlabs[uniqueId / SLAB_SIZE]->mEdges[uniqueId & (SLAB_SIZE - 1)];
+	}
+
+	PX_FORCE_INLINE PxU32 getEdgeCount() const { return mEdgeCount; }
+};
+
+struct Partition
+{
+	PartitionIndices mPartitionIndices[PxgEdgeType::eEDGE_TYPE_COUNT];
+
+	Partition()	{}
+
+	//Adds an edge to the partition
+	bool addToPartition(PxU32 uniqueIndex, PartitionIndexData& indexData)
+	{
+		PartitionIndices& indices = mPartitionIndices[indexData.mCType];
+		indexData.mPartitionEntryIndex = indices.size();
+		indices.pushBack(uniqueIndex);
+		return true;
+	}
+
+	void removeFromPartition(PxU32 uniqueIndex, PxPinnedArray<PartitionIndexData>& iterator)
+	{
+		const PartitionIndexData& indexData = iterator[uniqueIndex];
+		PartitionIndices& indices = mPartitionIndices[indexData.mCType];
+
+		// AD: defensive coding for OM-90842. If this assert hits, you maybe hit the same issue
+		PxU32 size = indices.size();
+		if (size == 0)
+		{
+			PxGetFoundation().error(PxErrorCode::eINTERNAL_ERROR, PX_FL, "PxgConstraintPartition: attempting to remove an edge from an empty partition. Skipping.");
+			PX_ASSERT(false);
+			return;
+		}
+
+		size--;
+
+		const PxU32 uniqueIdx = indices[size];
+		const PxU32 partitionEntryIndex = indexData.mPartitionEntryIndex;
+		iterator[uniqueIdx].mPartitionEntryIndex = partitionEntryIndex;
+		indices[partitionEntryIndex] = uniqueIdx;
+		indices.forceSize_Unsafe(size);
+	}
+};
+
+struct NodeEntries
+{
+	NodeEntryStorage	mEdges[PXG_BATCH_SIZE];
+};
+
+struct PartitionSlab : public PxUserAllocated
+{
+	Partition mPartitions[PXG_BATCH_SIZE];				//! Each slab has 32 partitions
+
+	PxArray<PxU32> mNodeBitmap;			//! Each slab 1 integer per-node, recording the presence of the node in any of the slabs
+
+	PxArray<NodeEntries> mNodeEntries;
+
+	PartitionSlab()	{}
+};
+
+struct PartitionArray : public PxUserAllocated
+{
+	PxArray<Partition*> mPartitions;
+	PxU32 mAccumulatedPartitionCount;
+	PxU32 mAccumulatedArtiPartitionCount;
+
+	PartitionArray() : mPartitions(1024), mAccumulatedPartitionCount(0), mAccumulatedArtiPartitionCount(0)
+	{
+	}
+
+	void clear()
+	{
+		mPartitions.forceSize_Unsafe(0);
+		mAccumulatedPartitionCount = 0;
+		mAccumulatedArtiPartitionCount = 0;
+	}
+};
+
+class PxgCombinedPartitionSlab : public PxUserAllocated
+{
+public:
+
+	PxU32 mNbPartitions;
+	const PxU32 mUserNbMaxPartitions;
+	PxU32 mNbMaxPartitions;
+	PartitionArray mPartitionArray[32];
+
+	PxgCombinedPartitionSlab(PxU32 maxNumPartitions) : mUserNbMaxPartitions(maxNumPartitions), mNbMaxPartitions(maxNumPartitions)
+	{
+	}
+
+	~PxgCombinedPartitionSlab()
+	{
+	}
+
+	void clear()
+	{
+		mNbPartitions = 0;
+		for (PxU32 i = 0; i< 32; ++i)
+			mPartitionArray[i].clear();
+	}
+};
+
+class PxgIncrementalPartition
+{
+	PX_NOCOPY(PxgIncrementalPartition)
+public:	// PT: TODO: revisit after the dust settles
+
+	PxArray<PartitionSlab*> mPartitionSlabs;
+
+	PartitionEdgeManager mEdgeManager;
+
+	PxU32 mNodeCount;
+	PxU32 mNbContactBatches;
+	PxU32 mNbConstraintBatches;
+	PxU32 mNbArtiContactBatches;
+	PxU32 mNbArtiConstraintBatches;
+
+	PxU32 mNbPartitions;
+	PxU32 mTotalContacts;
+	PxU32 mTotalConstraints;
+	PxU32 mTotalArticulationContacts;
+	PxU32 mTotalArticulationConstraints;
+
+	PxU32 mMaxSlabCount;
+	PxU32 mNbForceThresholds;
+
+#if PX_ENABLE_ASSERTS
+	PxArray<PxU32> mAccumulatedPartitionCount; // for contact
+	PxArray<PxU32> mAccumulatedConstraintCount; // for joint
+	PxArray<PxU32> mAccumulatedArtiPartitionCount; //for contact
+	PxArray<PxU32> mAccumulatedArtiConstraintCount; // for constraint
+#endif
+
+	PxBitMap mIsDirtyNode;
+
+	PxArray<PxU32>										mNpIndexArray;
+	PxPinnedArray<PartitionIndexData>					mPartitionIndexArray;
+	PxPinnedArray<PartitionNodeData>					mPartitionNodeArray;
+	PxPinnedArray<PxgSolverConstraintManagerConstants>	mSolverConstants;
+	PxInt32ArrayPinned									mNodeInteractionCountArray;
+
+	PxInt32ArrayPinned			mDestroyedContactEdgeIndices;
+
+	PxInt32ArrayPinned			mStartSlabPerPartition;
+	PxInt32ArrayPinned			mArticStartSlabPerPartition;
+	PxInt32ArrayPinned			mNbJointsPerPartition;
+	PxInt32ArrayPinned			mNbArtiJointsPerPartition;
+
+	PxArray<PxU32>				mJointStartIndices;
+	PxArray<PxU32>				mContactStartIndices;
+	PxArray<PxU32>				mArtiContactStartIndices;
+	PxArray<PxU32>				mArtiJointStartIndices;
+
+	PxgCombinedPartitionSlab	mCSlab;
+
+	const PxU64					mContextID;
+
+public:
+
+	PxgIncrementalPartition(const PxVirtualAllocator& allocator, PxU32 maxNumPartitions, PxU64 contextID);
+	~PxgIncrementalPartition();
+
+	void processLostFoundPatches(	Cm::FlushPool& flushPool, PxBaseTask* continuation,
+									IG::IslandSim& islandSim, PxgBodySimManager& bodySimManager, PxgJointManager& jointManager,
+									PxsContactManager** lostFoundPatchManagers, PxU32 nbLostFoundPatchManagers, const PxsContactManagerOutputCounts* lostFoundPairOutputs);
+
+	void updateIncrementalIslands(	IG::IslandSim& islandSim, const IG::AuxCpuData& islandManagerData,
+									Cm::FlushPool* flushPool, PxBaseTask* continuation,
+									PxsContactManagerOutputIterator& iterator, PxgBodySimManager& bodySimManager, PxgJointManager& jointManager);
+
+	// PT: internal reference versions, exposed for UTs
+	void processLostPatches_Reference(	IG::IslandSim& islandSim, PxgBodySimManager& bodySimManager, PxgJointManager& jointManager,
+										PxsContactManager** lostFoundPatchManagers, PxU32 nbLostFoundPatchManagers, const PxsContactManagerOutputCounts* lostFoundPairOutputs);
+
+	void processFoundPatches_Reference(	IG::IslandSim& islandSim, PxgBodySimManager& bodySimManager,
+										PxsContactManager** lostFoundPatchManagers, PxU32 nbLostFoundPatchManagers, const PxsContactManagerOutputCounts* lostFoundPairOutputs);
+
+	void updateIncrementalIslands_Reference(IG::IslandSim& islandSim, const IG::AuxCpuData& islandManagerData,
+											PxsContactManagerOutputIterator& iterator,
+											PxgBodySimManager& bodySimManager, PxgJointManager& jointManager);
+
+	// PT: edge data
+	PX_FORCE_INLINE	const PxPinnedArray<PxgSolverConstraintManagerConstants>&	getSolverConstants()	const	{ return mSolverConstants;	}
+
+	// PT: TODO: what's the difference between mNbPartitions and mCSlab.mNbPartitions ?
+	PX_FORCE_INLINE	PxU32	getNbPartitions()					const	{ return mNbPartitions;				}
+	PX_FORCE_INLINE	PxU32	getCombinedSlabMaxNbPartitions()	const	{ return mCSlab.mNbMaxPartitions;	}
+	PX_FORCE_INLINE	PxU32	getCombinedSlabNbPartitions()		const	{ return mCSlab.mNbPartitions;		}
+
+	PX_FORCE_INLINE	const PxPinnedArray<PartitionIndexData>&	getPartitionIndexArray()	const	{ return mPartitionIndexArray;	}
+	PX_FORCE_INLINE	const PxPinnedArray<PartitionNodeData>&		getPartitionNodeArray()		const	{ return mPartitionNodeArray;	}
+
+	PX_FORCE_INLINE	const PxInt32ArrayPinned&		getStartSlabPerPartition()			const	{ return mStartSlabPerPartition;		}
+	PX_FORCE_INLINE	const PxInt32ArrayPinned&		getArticStartSlabPerPartition()		const	{ return mArticStartSlabPerPartition;	}
+	PX_FORCE_INLINE	const PxInt32ArrayPinned&		getNbJointsPerPartition()			const	{ return mNbJointsPerPartition;			}
+	PX_FORCE_INLINE	const PxInt32ArrayPinned&		getNbArticJointsPerPartition()		const	{ return mNbArtiJointsPerPartition;		}
+	PX_FORCE_INLINE	const PxInt32ArrayPinned&		getNodeInteractionCountArray()		const	{ return mNodeInteractionCountArray;	}
+	PX_FORCE_INLINE	const PxInt32ArrayPinned&		getDestroyedContactEdgeIndices()	const	{ return mDestroyedContactEdgeIndices;	}
+
+	PX_FORCE_INLINE	const PxArray<PxU32>&			getNpIndexArray()					const	{ return mNpIndexArray;					}
+	PX_FORCE_INLINE	const PxArray<PartitionSlab*>&	getPartitionSlabs()					const	{ return mPartitionSlabs;				}
+
+	PX_FORCE_INLINE	const PxArray<PxU32>&			getContactStartIndices()			const	{ return mContactStartIndices;			}
+	PX_FORCE_INLINE	const PxArray<PxU32>&			getJointStartIndices()				const	{ return mJointStartIndices;			}
+	PX_FORCE_INLINE	const PxArray<PxU32>&			getArtiContactStartIndices()		const	{ return mArtiContactStartIndices;		}
+	PX_FORCE_INLINE	const PxArray<PxU32>&			getArtiJointStartIndices()			const	{ return mArtiJointStartIndices;		}
+
+	PX_FORCE_INLINE	PxU32	getCSlabAccumulatedPartitionCount(PxU32 index)	const
+	{
+		PX_ASSERT(index<32);
+		return mCSlab.mPartitionArray[index].mAccumulatedPartitionCount;
+	}
+
+	PX_FORCE_INLINE	PxU32	getCSlabAccumulatedArtiPartitionCount(PxU32 index)	const
+	{
+		PX_ASSERT(index<32);
+		return mCSlab.mPartitionArray[index].mAccumulatedArtiPartitionCount;
+	}
+
+#if PX_ENABLE_ASSERTS
+	PX_FORCE_INLINE	const PxArray<PxU32>&	getAccumulatedPartitionCount()		const	{ return mAccumulatedPartitionCount;		}
+	PX_FORCE_INLINE	const PxArray<PxU32>&	getAccumulatedConstraintCount()		const	{ return mAccumulatedConstraintCount;		}
+	PX_FORCE_INLINE	const PxArray<PxU32>&	getAccumulatedArtiPartitionCount()	const	{ return mAccumulatedArtiPartitionCount;	}
+	PX_FORCE_INLINE	const PxArray<PxU32>&	getAccumulatedArtiConstraintCount()	const	{ return mAccumulatedArtiConstraintCount;	}
+#endif
+	PX_FORCE_INLINE	PxU32	getNbContactBatches()				const	{ return mNbContactBatches;				}
+	PX_FORCE_INLINE	PxU32	getNbConstraintBatches()			const	{ return mNbConstraintBatches;			}
+	PX_FORCE_INLINE	PxU32	getNbArtiContactBatches()			const	{ return mNbArtiContactBatches;			}
+	PX_FORCE_INLINE	PxU32	getNbArtiConstraintBatches()		const	{ return mNbArtiConstraintBatches;		}
+
+	PX_FORCE_INLINE	PxU32	getTotalContacts()					const	{ return mTotalContacts;				}
+	PX_FORCE_INLINE	PxU32	getTotalConstraints()				const	{ return mTotalConstraints;				}
+	PX_FORCE_INLINE	PxU32	getTotalArticulationContacts()		const	{ return mTotalArticulationContacts;	}
+	PX_FORCE_INLINE	PxU32	getTotalArticulationConstraints()	const	{ return mTotalArticulationConstraints;	}
+
+	PX_FORCE_INLINE	bool	hasForceThresholds()				const	{ return mNbForceThresholds!=0;			}
+
+//private:	// PT: TODO: revisit after the dust settles
+
+#if USE_SPLIT_SECOND_PASS_ISLAND_GEN
+	PxBitMap	mActiveCMBitmapCopy;
+#endif
+	void reserveNodes(PxU32 nodeCount);
+
+	void getPreviousAndNextReferencesInSlab(NodeEntryDecoded& prev, NodeEntryDecoded& next, PxU32 index, PxU32 uniqueId, const PartitionSlab* slab, PxU32 slabMask) const;
+
+	PartitionEdge* addEdge_Stage1(const IG::IslandSim& islandSim, IG::EdgeIndex edgeIndex, PxU32 patchIndex, PxU32 npIndex, PxNodeIndex node1, PxNodeIndex node2);
+
+	void addEdge_Stage2(IG::GPUExternalData& islandSimGpuData, IG::EdgeIndex edgeIndex, PartitionEdge* partitionEdge, bool specialHandled, bool doPart1, bool doPart2);
+
+	bool addJointManager(const PartitionEdge* edge, PxgBodySimManager& bodySimManager);
+	bool addContactManager(PartitionEdge* edge, const PxcNpWorkUnit& unit, PxgBodySimManager& bodySimManager);
+
+	void removeEdge(PartitionEdge* edge, IG::GPUExternalData& islandSimGpuData, PxgBodySimManager& manager);
+	PX_FORCE_INLINE void removeAllEdges(IG::GPUExternalData& islandSimGpuData, PxgBodySimManager& bodySimManager, PartitionEdge* partitionEdge);
+	void destroyEdges(const IG::CPUExternalData& islandSimCpuData, IG::GPUExternalData& islandSimGpuData, PxgBodySimManager& bodySimManager, PxgJointManager& jointManager, bool clearDestroyedEdges, bool recordDestroyedEdges);
+
+	void addEdgeInternal(const PartitionEdge* PX_RESTRICT partitionEdge, PartitionSlab* PX_RESTRICT slab, PxU16 id, PxU16 baseId);
+	void removeEdgeInternal(PartitionSlab* PX_RESTRICT slab, const PartitionEdge* PX_RESTRICT edge, PxU32 id);
+
+	void doCompaction();
+#if PX_PARTITION_COMPACTION
+	void pullForwardConstraints(PxU32 nodeIndex);
+#endif
+
+public:	// PT: TODO: revisit after the dust settles
+
+	void updateIncrementalIslands_Part1(
+		IG::IslandSim& islandSim, const IG::AuxCpuData& islandManagerData,
+		PxsContactManagerOutputIterator& iterator,
+		PxgBodySimManager& bodySimManager, PxgJointManager& jointManager);
+
+	void updateIncrementalIslands_Part2(
+		IG::IslandSim& islandSim, const IG::AuxCpuData& islandManagerData,
+		PxsContactManagerOutputIterator& iterator,
+		PxgBodySimManager& bodySimManager);
+
+	enum SpecialCase
+	{
+		SPECIAL_CASE_NONE,
+		SPECIAL_CASE_STATIC_RB,
+		SPECIAL_CASE_ARTI_SELF,
+		SPECIAL_CASE_STATIC_ARTI0,
+		SPECIAL_CASE_STATIC_ARTI1
+	};
+
+	struct Part2WorkItem
+	{
+		PxU32	mEdgeID;
+		PxU16	mPatchIndex;
+		PxU16	mSpecialCase;
+		PartitionEdge*	mPartitionEdge;
+	};
+
+	PxArray<Part2WorkItem>	mPart2WorkItems;
+	PxArray<PxU32>			mPart2EdgeCases;	// PT: indices into mPart2WorkItems
+
+	void updateIncrementalIslands_Part2_0(IG::IslandSim& islandSim, const IG::AuxCpuData& islandManagerData, PxsContactManagerOutputIterator& iterator);
+
+	void updateIncrementalIslands_Part2_1(PxU32 startIndex, PxU32 nbToProcess, IG::IslandSim& islandSim, const IG::AuxCpuData& islandManagerData);
+
+	void updateIncrementalIslands_Part2_2(IG::IslandSim& islandSim, PxgBodySimManager& bodySimManager, bool dopart1, bool dopart2, bool dopart3);
+
+	void updateIncrementalIslands_Part2_2_ProcessEdgeCases(IG::IslandSim& islandSim);
+
+	void updateIncrementalIslands_Part3(IG::IslandSim& islandSim, PxgJointManager& jointManager);
+
+	void processLostPatchesMT(	IG::IslandSim& islandSim, Cm::FlushPool& flushPool, PxBaseTask* continuation,
+								PxsContactManager** lostFoundPatchManagers, PxU32 nbLostFoundPatchManagers, const PxsContactManagerOutputCounts* lostFoundPairOutputs,
+								PxgBodySimManager& bodySimManager, PxgJointManager& jointManager);
+
+	struct PartitionEdgeBatch : public PxUserAllocated
+	{
+		PxArray<PartitionEdge*>	mEdges;
+	};
+	PxArray<PartitionEdgeBatch*>	mBatches;
+};
+
+}
+
+#endif
+
--- a/engine/third_party/physx/source/gpusolver/include/PxgConstraintPrep.h
+++ b/engine/third_party/physx/source/gpusolver/include/PxgConstraintPrep.h
@@ -0,0 +1,125 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef PXG_CONSTRAINT_PREP_H
+#define PXG_CONSTRAINT_PREP_H
+
+#include "PxConstraintDesc.h"
+#include "PxgSolverConstraintDesc.h"
+#include "PxgD6JointData.h"
+#include "AlignedTransform.h"
+
+#include "PxcNpWorkUnit.h"
+
+namespace physx
+{
+	typedef PxcNpWorkUnitFlag	PxgNpWorkUnitFlag;
+
+	struct PxgSolverBody;
+	struct PxgSolverBodyData;
+	struct PxgSolverConstraintDesc;
+
+	//This structure can update everyframe
+	struct PxgConstraintPrePrep
+	{
+	public:
+		PxNodeIndex mNodeIndexA;	//8		8
+		PxNodeIndex mNodeIndexB;	//16	8
+		PxU32 mFlags;				//20	4
+		float mLinBreakForce;		//24	4
+		float mAngBreakForce;		//28	4
+	};
+
+	struct PxgConstraintData
+	{
+		PxgConstraintInvMassScale mInvMassScale;	//16
+		float4 mRaWorld_linBreakForceW;				//16
+		float4 mRbWorld_angBreakForceW;				//16
+		uint4 mNumRows_Flags_StartIndex;			//16
+	};
+
+	struct PxgBlockConstraint1DData
+	{
+		PX_ALIGN(256, PxgConstraintInvMassScale mInvMassScale[32]);		//512			512
+		PX_ALIGN(256, float4 mRAWorld_linBreakForce[32]);				//1024			512
+		PX_ALIGN(256, float4 mRBWorld_AngBreakForce[32]);				//1152			128
+		PX_ALIGN(128, PxU32 mNumRows[32]);								//1284			128
+		PX_ALIGN(128, PxU32 mFlags[32]);								//1412			128
+	};
+
+	struct PxgConstraint1DData
+	{
+		PxgConstraintInvMassScale	mInvMassScale;						//16	16						
+		float4						mBody0WorldOffset_linBreakForce;	//16	32					
+		float						mAngBreakForce;						//4		36				
+		PxU32						mNumRows;							//4		40					
+		PxU32						mFlags;								//4		44
+		PxU32						mPadding;							//4		48
+	};
+
+	struct /*__device_builtin__*/ __builtin_align__(16) PxgMaterialContactData
+	{
+		PxReal restDistance;				//4		4
+		PxReal staticFriction;				//8		4
+		PxReal dynamicFriction;				//12	4
+		PxU8 mNumContacts;					//13	1
+		PxU8 mSolverFlags;					//14	1
+		PxU8 prevFrictionPatchCount;		//15	1
+		PxU8 pad;							//16	1
+	};
+
+	struct PxgBlockContactData
+	{
+		PX_ALIGN(128,	PxgConstraintInvMassScale	mInvMassScale[32]);				//512	512		
+		PX_ALIGN(128,	float4						normal_restitutionW[32]);		//1024	512
+		PX_ALIGN(128,	PxgMaterialContactData		contactData[32]);				//1536	512
+		PX_ALIGN(128,	PxReal						damping[32]);					//1664	128
+	};
+
+	//This is for articulation contact
+	struct PxgContactData
+	{
+		PxgConstraintInvMassScale mInvMassScale;
+		float4 normal_restitutionW;
+		PxgMaterialContactData contactData;
+	};
+
+	struct PxgBlockContactPoint
+	{
+		PX_ALIGN(256,	float4	point_separationW[32]);
+		PX_ALIGN(256,	float4	targetVel_maxImpulseW[32]);
+	};
+
+	struct PxgContactPoint
+	{
+		float4	point_separationW;
+		float4	targetVel_maxImpulseW;
+	};
+}
+
+#endif
--- a/engine/third_party/physx/source/gpusolver/include/PxgConstraintWriteBack.h
+++ b/engine/third_party/physx/source/gpusolver/include/PxgConstraintWriteBack.h
@@ -0,0 +1,44 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef PXG_CONSTRAINT_WRITE_BACK_H
+#define PXG_CONSTRAINT_WRITE_BACK_H
+
+#include "foundation/PxPreprocessor.h"
+#include "vector_types.h"
+
+namespace physx
+{
+	struct PxgConstraintWriteback
+	{
+		float4	linearImpulse_broken;
+		float4  angularImpulse_residual;
+	};
+}
+
+#endif
--- a/engine/third_party/physx/source/gpusolver/include/PxgContext.h
+++ b/engine/third_party/physx/source/gpusolver/include/PxgContext.h
@@ -0,0 +1,603 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef PXG_CONTEXT_H
+#define PXG_CONTEXT_H
+
+#include "DyContext.h"
+#include "PxSimulationStatistics.h"
+#include "PxgConstraintPartition.h"
+#include "PxgCudaMemoryAllocator.h"
+#include "PxgConstraintPrep.h"
+#include "PxvNphaseImplementationContext.h"
+
+namespace physx
+{
+	class PxgCudaBroadPhaseSap;
+	class PxgSolverCore;
+	class PxgGpuNarrowphaseCore;
+	class PxgArticulationCore;
+	class PxgSimulationCore;
+	class PxgSoftBodyCore;
+	class PxgFEMClothCore;
+	class PxgPBDParticleSystemCore;
+	class PxgSimulationController;
+	struct PxgIslandContext;
+	class PxgHeapMemoryAllocatorManager;
+	struct PxsTorsionalFrictionData;
+
+	// PT: TODO: all these tasks are missing a proper context ID for the profiler...
+
+	class PxgCpuJointPrePrepTask : public Cm::Task
+	{
+		PxgSimulationController& mSimController;
+
+		const Dy::Constraint*const* mConstraints;
+		PxgConstraintData* mConstraintData;
+		Px1DConstraint* mConstraintRows;
+
+		const PxU32 mStartIndex;
+		const PxU32 mNbToProcess;
+		const PxU32 mGpuJointOffset;
+
+		PxI32* mRowCounts;
+
+		PX_NOCOPY(PxgCpuJointPrePrepTask)
+
+	public:
+		PxgCpuJointPrePrepTask(PxgSimulationController& simConstroller, PxU32 startIndex, PxU32 nbToProcess, PxU32 gpuJointOffset,
+			const Dy::Constraint*const* constraints, PxgConstraintData* constraintData, Px1DConstraint* constraintRows, PxI32* rowCounts) :
+			Cm::Task(0), mSimController(simConstroller), mConstraints(constraints), mConstraintData(constraintData), mConstraintRows(constraintRows),
+			mStartIndex(startIndex), mNbToProcess(nbToProcess), mGpuJointOffset(gpuJointOffset), mRowCounts(rowCounts)
+		{
+		}
+
+		virtual void runInternal() PX_OVERRIDE PX_FINAL;
+
+		virtual const char* getName() const PX_OVERRIDE PX_FINAL
+		{
+			return "PxgTGSCpuJointPrePrepTask";
+		}
+	};
+
+	class PxgGpuContext;
+
+	class PxgCpuPreIntegrationTask : public Cm::Task
+	{
+		PxgGpuContext& mContext;
+
+		PX_NOCOPY(PxgCpuPreIntegrationTask)
+
+	public:
+
+		PxgCpuPreIntegrationTask(PxgGpuContext& context) : Cm::Task(0), mContext(context)
+		{
+		}
+
+		virtual void runInternal() PX_OVERRIDE PX_FINAL;
+
+		virtual const char* getName() const PX_OVERRIDE PX_FINAL
+		{
+			return "PxgCpuPreIntegrationTask";
+		}
+	};
+
+	class PxgCpuContactPrePrepTask : public Cm::Task
+	{
+		//From the below, we should be able to iterate over the partitions, process contact pairs
+		const PxgIncrementalPartition& mPartition;
+		const PxU32 mPartitionIndex;
+		const PxU32 mStartIndexWithinPartition;
+		const PxU32 mNbToProcess;
+
+		const PxU32* mStartSlabIter;
+		const PxU32 mStartSlabOffset;
+		const PxU32* mContactStartIndices;
+
+		PxgConstraintBatchHeader* mBatchHeaders;
+		const PxU32 mNumBatches;
+		const PxU32 mWorkUnitStartIndex;
+
+		PxU32* mPinnedEdgeIds;
+
+		const PxsContactManagerOutputIterator& mOutputIterator;
+
+		const PxU8* mBaseContactPatch;
+		const PxU8* mBaseContactPointer;
+
+		PX_NOCOPY(PxgCpuContactPrePrepTask)
+
+	public:
+		PxgCpuContactPrePrepTask(const PxgIncrementalPartition& partition, PxU32 partitionIndex, PxU32 startIndexWithinPartition, PxU32 nbToProcess,
+			const PxU32* startSlabIter, PxU32 startSlabOffset, const PxU32* contactStartIndices,
+			PxgConstraintBatchHeader* batchHeaders, PxU32 nbBatches, PxU32 workUnitStartIndex,
+			PxU32* pinnedEdgeIds, PxsContactManagerOutputIterator& outputIter,
+			const PxU8* baseContactPatch, const PxU8* baseContactPointer) : Cm::Task(0),
+			mPartition(partition), mPartitionIndex(partitionIndex), mStartIndexWithinPartition(startIndexWithinPartition), mNbToProcess(nbToProcess),
+			mStartSlabIter(startSlabIter), mStartSlabOffset(startSlabOffset), mContactStartIndices(contactStartIndices),
+			mBatchHeaders(batchHeaders), mNumBatches(nbBatches), mWorkUnitStartIndex(workUnitStartIndex),
+			mPinnedEdgeIds(pinnedEdgeIds), mOutputIterator(outputIter),
+			mBaseContactPatch(baseContactPatch), mBaseContactPointer(baseContactPointer)
+		{
+		}
+
+		virtual void runInternal() PX_OVERRIDE PX_FINAL;
+
+		virtual const char* getName() const PX_OVERRIDE PX_FINAL
+		{
+			return "PxgCpuContactPrePrepTask";
+		}
+	};
+
+	class PxgCpuConstraintPrePrepTask : public Cm::Task
+	{
+		const PartitionIndices& mEdgeIds;
+		const PxU32 mStartEdgeIdx;
+		const PxU32 mNumEdges;
+		PxgConstraintBatchHeader* mBatchHeaders;
+
+		const PxU32 mNumBatches;
+		const PxU32 mConstraintBlockStartIndex;
+		const PxU32 mUniqueIdStartIndex;
+
+		PxU32* mPinnedEdgeIds;
+
+		const PxgConstraintPrePrep* mConstraintPrePrep;
+
+		PX_NOCOPY(PxgCpuConstraintPrePrepTask)
+
+	public:
+
+		static const PxU32 NbConstraintsPerTaskTGS = 2048u;
+		static const PxU32 NbConstraintsPerTaskPGS = 8192u;
+
+		PxgCpuConstraintPrePrepTask(const PartitionIndices& edgeIds, PxU32 startEdgeIdx, PxU32 nbEdges, PxgConstraintBatchHeader* batchHeaders, PxU32 nbBatches,
+			PxU32 constraintBlockStartIndex, PxU32 uniqueIdStartIndex, PxU32* pinnedEdgeIds,
+			const PxgConstraintPrePrep* constraintPrePrep) : Cm::Task(0),
+			mEdgeIds(edgeIds), mStartEdgeIdx(startEdgeIdx), mNumEdges(nbEdges), mBatchHeaders(batchHeaders), mNumBatches(nbBatches),
+			mConstraintBlockStartIndex(constraintBlockStartIndex), mUniqueIdStartIndex(uniqueIdStartIndex), mPinnedEdgeIds(pinnedEdgeIds)
+			, mConstraintPrePrep(constraintPrePrep)
+		{
+		}
+
+		virtual void runInternal() PX_OVERRIDE PX_FINAL;
+
+		virtual const char* getName() const PX_OVERRIDE PX_FINAL
+		{
+			return "PxgCpuConstraintPrePrepTask";
+		}
+	};
+
+	//this include contact and constraint
+	class PxgCpuArtiConstraintPrePrepTask : public Cm::Task
+	{
+		const PartitionIndices&		mEdgeIds;
+		const PxU32					mStartEdgeIdx;
+		const PxU32					mNumEdges;
+		PxgConstraintBatchHeader*	mBatchHeaders;
+
+		const PxU32					mNumBatches;
+		const PxU32					mConstraintBlockStartIndex;
+		const PxU32					mUniqueIdStartIndex;
+
+		PxU32*						mPinnedEdgeIds;
+
+		const PxgConstraintPrePrep*	mConstraintPrePrep;
+		const bool					mIsContact;
+
+		PX_NOCOPY(PxgCpuArtiConstraintPrePrepTask)
+
+	public:
+
+		static const PxU32 NbConstraintsPerTaskPGS = 8192u;
+		static const PxU32 NbConstraintsPerTaskTGS = 512u;
+
+		PxgCpuArtiConstraintPrePrepTask(const PartitionIndices& edgeIds, PxU32 startEdgeIdx, PxU32 nbEdges, PxgConstraintBatchHeader* batchHeaders,
+			PxU32 nbBatches, PxU32 constraintBlockStartIndex, PxU32 uniqueIdStartIndex, PxU32* pinnedEdgeIds,
+			const PxgConstraintPrePrep* constraintPrePrep, bool isContact) : Cm::Task(0),
+			mEdgeIds(edgeIds), mStartEdgeIdx(startEdgeIdx), mNumEdges(nbEdges), mBatchHeaders(batchHeaders), mNumBatches(nbBatches),
+			mConstraintBlockStartIndex(constraintBlockStartIndex), mUniqueIdStartIndex(uniqueIdStartIndex), mPinnedEdgeIds(pinnedEdgeIds)
+			, mConstraintPrePrep(constraintPrePrep), mIsContact(isContact)
+		{
+		}
+
+		virtual void		runInternal() PX_OVERRIDE PX_FINAL;
+
+		virtual const char* getName() const PX_OVERRIDE PX_FINAL
+		{
+			return "PxgCpuArtiConstraintPrePrepTask";
+		}
+	};
+
+	class PxgCpuPrepTask : public Cm::Task
+	{
+		PxgGpuContext& mContext;
+		
+		PX_NOCOPY(PxgCpuPrepTask)
+
+	public:
+							PxgCpuPrepTask(PxgGpuContext& context) : Cm::Task(0), mContext(context)	{}
+
+		virtual void		runInternal() PX_OVERRIDE PX_FINAL;
+		virtual const char*	getName() const PX_OVERRIDE PX_FINAL
+		{
+			return "PxgCpuPrepTask";
+		}
+	};
+
+	class PxgGpuPrePrepTask : public Cm::Task
+	{
+		PxgGpuContext& mContext;
+
+		PX_NOCOPY(PxgGpuPrePrepTask)
+
+	public:
+							PxgGpuPrePrepTask(PxgGpuContext& context) : Cm::Task(0), mContext(context)	{}
+
+		virtual void		runInternal() PX_OVERRIDE PX_FINAL;
+		virtual const char* getName() const PX_OVERRIDE PX_FINAL
+		{
+			return "PxgGpuPrePrepTask";
+		}
+	};
+
+	class PxgPostSolveTask : public Cm::Task
+	{
+		PxgGpuContext& mContext;
+	
+		PX_NOCOPY(PxgPostSolveTask)
+
+	public:
+							PxgPostSolveTask(PxgGpuContext& context) : Cm::Task(0), mContext(context)	{}
+
+		virtual void		runInternal() PX_OVERRIDE PX_FINAL;
+		virtual const char*	getName() const PX_OVERRIDE PX_FINAL
+		{
+			return "PxgPostSolveTask";
+		}
+	};
+
+	class PxgGpuTask : public Cm::Task
+	{
+		PxgGpuContext&	mContext;
+		PxU32			mMaxNodes;
+		PxBitMapPinned*	mChangedHandleMap;//this is for the simulation controller
+		
+		PX_NOCOPY(PxgGpuTask)
+
+	public:
+							PxgGpuTask(PxgGpuContext& context) : Cm::Task(0), mContext(context), mMaxNodes(0), mChangedHandleMap(NULL)	{}
+
+				void		setMaxNodesAndWordCounts(const PxU32 maxNodes, PxBitMapPinned& changedHandleMap) { mMaxNodes = maxNodes; mChangedHandleMap = &changedHandleMap; }
+
+		virtual void		runInternal() PX_OVERRIDE PX_FINAL;
+		virtual const char*	getName() const PX_OVERRIDE PX_FINAL
+		{
+			return "PxgGpuTask";
+		}
+	};
+
+	class PxgGpuIntegrationTask : public Cm::Task
+	{
+		PxgGpuContext& mContext;
+
+		PX_NOCOPY(PxgGpuIntegrationTask)
+	public:
+							PxgGpuIntegrationTask(PxgGpuContext& context) : Cm::Task(0), mContext(context)	{}
+
+		virtual void		runInternal() PX_OVERRIDE PX_FINAL;
+		virtual const char*	getName() const PX_OVERRIDE PX_FINAL
+		{
+			return "PxgGpuIntegrationTask";
+		}
+	};
+
+	class PxgGpuContext : public Dy::Context
+	{
+		PX_NOCOPY(PxgGpuContext)
+
+	public:
+
+		PxgGpuContext(Cm::FlushPool& flushPool, IG::SimpleIslandManager& islandManager, 
+			PxU32 maxNumPartitions, PxU32 maxNumStaticPartitions, bool enableStabilization, bool useEnhancedDeterminism, 
+			PxReal maxBiasCoefficient, PxvSimStats& simStats, PxgHeapMemoryAllocatorManager* heapMemoryManager,
+			PxReal lengthScale, bool enableDirectGPUAPI, PxU64 contextID, bool isResidualReportingEnabled, bool isTGS);
+
+		virtual ~PxgGpuContext();
+
+		PX_FORCE_INLINE PxgSolverCore* getGpuSolverCore() { return mGpuSolverCore;}
+
+		PX_FORCE_INLINE PxgArticulationCore* getArticulationCore() { return mGpuArticulationCore; }
+
+		PX_FORCE_INLINE PxgGpuNarrowphaseCore* getNarrowphaseCore() { return mGpuNpCore; }
+
+		PX_FORCE_INLINE PxgSimulationCore* getSimulationCore() { return mGpuSimulationCore; }
+
+		PX_FORCE_INLINE PxgCudaBroadPhaseSap*	getGpuBroadPhase() { return mGpuBp;  }
+
+		PX_FORCE_INLINE PxgSoftBodyCore*	getGpuSoftBodyCore() { return mGpuSoftBodyCore; }
+
+		PX_FORCE_INLINE PxgFEMClothCore*	getGpuFEMClothCore() { return mGpuFEMClothCore; }
+
+		PX_FORCE_INLINE PxgParticleSystemCore**	getGpuParticleSystemCores() { return mGpuParticleSystemCores.begin(); }
+		PX_FORCE_INLINE PxU32	getNbGpuParticleSystemCores() { return mGpuParticleSystemCores.size(); }
+
+		PxgParticleSystemCore* getGpuParticleSystemCore();
+
+		PX_FORCE_INLINE PxU32 getCurrentContactStreamIndex() { return mCurrentContactStream; }
+
+		PX_FORCE_INLINE Cm::FlushPool&	getFlushPool() { return mFlushPool; }
+
+		PX_FORCE_INLINE PxU8* getPatchStream(const PxU32 index) { return mPatchStreamAllocators[index]->mStart; }
+		PX_FORCE_INLINE PxU8* getContactStream(const PxU32 index) { return mContactStreamAllocators[index]->mStart; }
+
+		PX_FORCE_INLINE bool enforceConstraintWriteBackToHostCopy() const { return mEnforceConstraintWriteBackToHostCopy; }
+
+		//this method make sure we get PxgSimultionController instead of PxsSimulationController
+		PxgSimulationController*			getSimulationController();
+
+		virtual void						setSimulationController(PxsSimulationController* mSimulationController)	PX_OVERRIDE;
+
+		virtual void						mergeResults()	PX_OVERRIDE;
+		virtual void						getDataStreamBase(void*& contactStreamBase, void*& patchStreamBase, void*& forceAndIndexStreamBase)	PX_OVERRIDE;
+		
+		virtual void						updateBodyCore(PxBaseTask* continuation)	PX_OVERRIDE;
+
+		virtual void						update(	Cm::FlushPool& flushPool, PxBaseTask* continuation, PxBaseTask* postPartitioningTask, PxBaseTask* lostTouchTask,
+													PxvNphaseImplementationContext* nphase, PxU32 maxPatchesPerCM, PxU32 maxArticulationLinks, PxReal dt,
+													const PxVec3& gravity, PxBitMapPinned& changedHandleMap)	PX_OVERRIDE;
+
+		virtual void						updatePostPartitioning(	PxBaseTask* lostTouchTask,
+																	PxvNphaseImplementationContext* nphase, PxU32 maxPatchesPerCM, PxU32 maxArticulationLinks, PxReal dt,
+																	const PxVec3& gravity, PxBitMapPinned& changedHandleMap)	PX_OVERRIDE;
+
+		virtual void setActiveBreakableConstraintCount(PxU32 activeBreakableConstraintCount) PX_OVERRIDE
+		{
+			mEnforceConstraintWriteBackToHostCopy = (activeBreakableConstraintCount > 0);
+		}
+
+		//this is the pre-prepare code for block format joints loaded from the non-block format joints
+		void								doConstraintJointBlockPrePrepGPU();
+
+		void								doStaticArticulationConstraintPrePrep(physx::PxBaseTask* continuation, const PxU32 articulationConstraintBatchIndex, const PxU32 articulationContactBatchIndex);
+
+		void								doStaticRigidConstraintPrePrep(physx::PxBaseTask* continuation);
+		
+		void								doConstraintSolveGPU(PxU32 maxNodes, PxBitMapPinned& changedHandleMap);
+
+		void								doPostSolveTask(physx::PxBaseTask* continuation);
+
+		virtual void						processPatches(	Cm::FlushPool& flushPool, PxBaseTask* continuation,
+															PxsContactManager** lostFoundPatchManagers, PxU32 nbLostFoundPatchManagers, PxsContactManagerOutputCounts* outCounts)	PX_OVERRIDE;
+
+		bool								isTGS() const { return mIsTGS; }
+		bool								isExternalForcesEveryTgsIterationEnabled() const { return mIsExternalForcesEveryTgsIterationEnabled; }
+
+		void								doPreIntegrationTaskCommon(physx::PxBaseTask* continuation);
+
+		void 								doConstraintPrePrepCommon(physx::PxBaseTask* continuation);
+
+		void								doConstraintPrePrepGPUCommon(bool hasForceThresholds);
+
+		void								cpuJointPrePrepTask(physx::PxBaseTask* continuation);
+
+		void 								allocateTempPinnedSolverMemoryCommon();
+
+		PX_FORCE_INLINE bool				getEnableDirectGPUAPI() const { return mEnableDirectGPUAPI;	}
+
+		PxvSimStats&			 			getSimStats() { return mSimStats; }
+
+		PxBaseTask*								mLostTouchTask;
+
+		PxU32									mTotalEdges;
+		PxU32									mTotalPreviousEdges;
+
+		PxsContactManagerOutputIterator			mOutputIterator;
+
+		PxReal*									mGPURestDistances;
+		Sc::ShapeInteraction**					mGPUShapeInteractions;
+		PxsTorsionalFrictionData*				mGPUTorsionalData;
+
+		Cm::FlushPool&							mFlushPool;
+		bool									mSolvedThisFrame;
+		PxgIncrementalPartition					mIncrementalPartition;
+
+		PxPinnedArray<PxNodeIndex>				mActiveNodeIndex;	//this will change everyframe, include rigid bodies and articulation
+		
+		PxgSolverBody							mWorldSolverBody;
+		PxgSolverBodyData						mWorldSolverBodyData;
+		PxgSolverBodySleepData					mWorldSolverBodySleepData;
+		PxgSolverTxIData						mWorldTxIData;
+
+		PxPinnedArray<PxgSolverBody>			mSolverBodyPool;
+		PxPinnedArray<PxAlignedTransform>		mBody2WorldPool;
+
+		//write back from the active articulation
+		//each articulation has max 64 links and max 3 * 63 dofsnd 1 wake counter
+		//see PxgArticulationLinkJointRootStateData
+		PxInt8ArrayPinned						mLinkAndJointAndRootStateDataPool;
+
+		PxPinnedArray<PxgSolverBodySleepData>	mArticulationSleepDataPool;
+		PxPinnedArray<Dy::ErrorAccumulator>		mInternalResidualPerArticulationVelIter; //Internal residuals in first half (do not include residuals from external constraints, e. g. contacts or PxConstraints), second half contains residual from contacts
+		PxPinnedArray<Dy::ErrorAccumulator>		mInternalResidualPerArticulationPosIter; //Internal residuals in first half (do not include residuals from external constraints, e. g. contacts or PxConstraints), second half contains residual from contacts
+
+		PxInt32ArrayPinned						m1dConstraintBatchIndices;
+		PxInt32ArrayPinned						mContactConstraintBatchIndices;
+		PxInt32ArrayPinned						mArti1dConstraintBatchIndices;
+		PxInt32ArrayPinned						mArtiContactConstraintBatchIndices;
+
+		PxInt32ArrayPinned						mConstraintsPerPartition;
+		PxInt32ArrayPinned						mArtiConstraintsPerPartition;
+
+		PxPinnedArray<PxgSolverBodyData>		mSolverBodyDataPool;
+		PxPinnedArray<PxgSolverBodySleepData>	mSolverBodySleepDataPool;
+		PxPinnedArray<PxgSolverTxIData>			mSolverTxIDataPool;
+
+		PxgPinnedHostLinearMemoryAllocator*		mPinnedMemoryAllocator;
+
+		PxgPinnedHostLinearMemoryAllocator*		mContactStreamAllocators[2];
+		PxgPinnedHostLinearMemoryAllocator*		mPatchStreamAllocators[2];
+		PxgPinnedHostLinearMemoryAllocator*		mForceStreamAllocator;
+		PxgPinnedHostLinearMemoryAllocator*		mFrictionPatchStreamAllocator;
+
+		PxU32									mCurrentContactStream;
+
+		PxgIslandContext*						mIslandContextPool;
+		PxU32									mNumIslandContextPool;
+
+		PxU32									mNum1DConstraintBlockPrepPool; //this and mNum1dConstraintBatches is the same, we can get rid of it later
+
+		PxU32									mNumContactManagers;
+		PxU32									mNum1DConstraints;
+
+		PxU32									mKinematicCount;
+		PxU32									mArticulationCount;
+		PxU32									mArticulationStartIndex; //record the start node index in the mActiveNodeIndex for articulation
+
+		PxU32									mBodyCount;
+
+		PxI32									mNumContactBatches;
+		PxI32									mNum1dConstraintBatches;
+		PxI32									mNumArtiContactBatches;
+		PxI32									mNumArti1dConstraintBatches;
+		PxI32									mNumStaticArtiContactBatches;
+		PxI32									mNumStaticArti1dConstraintBatches;
+		PxI32									mNumSelfArtiContactBatches;
+		PxI32									mNumSelfArti1dConstraintBatches;
+
+		PxI32									mNumStaticRigidContactBatches;
+		PxI32									mNumStaticRigid1dConstraintBatches;
+
+		PxI32									mArtiStaticConstraintBatchOffset;
+		PxI32									mArtiStaticContactBatchOffset;
+
+		//KS - we can't know this on CPU because the offset comes after the articulation constraints, which
+		//are computed on GPU
+		//PxI32									mRigidStaticConstraintBatchOffset;
+		//PxI32									mRigidStaticContactBatchOffset;
+
+		PxU32*									mConstraintUniqueIndices;
+		PxU32*									mContactUniqueIndices;
+		PxU32*									mArtiConstraintUniqueIndices;
+		PxU32*									mArtiContactUniqueIndices;
+		PxU32*									mArtiStaticConstraintUniqueIndices;
+		PxU32*									mArtiStaticContactUniqueIndices;
+		PxU32*									mArtiSelfConstraintUniqueIndices;
+		PxU32*									mArtiSelfContactUniqueIndices;
+		PxU32*									mArtiStaticConstraintStartIndex;
+
+		PxU32*									mRigidStaticConstraintUniqueIndices;
+		PxU32*									mRigidStaticContactUniqueIndices;
+
+		PxU32*									mArtiStaticConstraintCount;
+		PxU32*									mArtiStaticContactStartIndex;
+		PxU32*									mArtiStaticContactCount;
+
+		PxU32*									mRigidStaticConstraintStartIndex;
+		PxU32*									mRigidStaticConstraintCount;
+
+		PxI32									mCachedPositionIterations;
+		PxI32									mCachedVelocityIterations;
+
+		PxInt32ArrayPinned						mArtiStaticContactCounts;
+		PxInt32ArrayPinned						mArtiStaticJointCounts;
+
+		PxInt32ArrayPinned						mArtiStaticContactIndices;
+		PxInt32ArrayPinned						mArtiStaticJointIndices;
+
+		PxInt32ArrayPinned						mArtiSelfContactCounts;
+		PxInt32ArrayPinned						mArtiSelfJointCounts;
+
+		PxInt32ArrayPinned						mArtiSelfContactIndices;
+		PxInt32ArrayPinned						mArtiSelfJointIndices;
+
+		PxInt32ArrayPinned						mRigidStaticContactCounts;
+		PxInt32ArrayPinned						mRigidStaticJointCounts;
+
+		PxInt32ArrayPinned						mRigidStaticContactIndices;
+		PxInt32ArrayPinned						mRigidStaticJointIndices;
+
+		PxInt32ArrayPinned						mNodeIndicesStagingBuffer;
+		PxInt32ArrayPinned						mIslandIds;
+		PxInt32ArrayPinned						mIslandStaticTouchCounts;
+
+		//other joint type(not d6) cpu constraints
+		PxgConstraintBatchHeader*				mConstraintBatchHeaders;
+		PxgConstraintBatchHeader*				mArticConstraintBatchHeaders;
+
+		PxU32									mNumConstraintBatches;
+
+		PxU32									mNumArticConstraintBatches;
+		PxU32									mNumArtiStaticConstraintBatches;
+		PxU32									mNumArtiSelfConstraintBatches;
+
+		PxU32									mNumRigidStaticConstraintBatches;
+
+		bool									mHasForceThresholds;
+		const bool								mIsTGS;
+	    bool									mIsExternalForcesEveryTgsIterationEnabled;
+
+		PxgCudaBroadPhaseSap*					mGpuBp;
+		PxgGpuNarrowphaseCore*					mGpuNpCore;
+		PxgArticulationCore*					mGpuArticulationCore;
+		PxgSimulationCore*						mGpuSimulationCore;
+		PxgSoftBodyCore*						mGpuSoftBodyCore;
+		PxgFEMClothCore*						mGpuFEMClothCore;
+		PxArray<PxgParticleSystemCore*>			mGpuParticleSystemCores;
+		PxgParticleSystemCore*					mGpuPBDParticleSystemCore;
+		
+		PxgSolverCore*							mGpuSolverCore;
+
+		PxU32									mMaxNumStaticPartitions;
+
+		const bool								mEnableDirectGPUAPI;
+		bool									mRecomputeArticulationBlockFormat;
+
+		// when Direct GPU API is enabled, the constraint writeback data might have to be copied to host to
+		// support breakable D6 joints
+		bool									mEnforceConstraintWriteBackToHostCopy;
+
+		PxgCpuPreIntegrationTask				mPreIntegrationTask;
+		PxgCpuPrepTask							mPrepTask;
+		PxgGpuPrePrepTask						mGpuPrePrepTask;
+		PxgGpuIntegrationTask					mGpuIntegrationTask;
+		PxgGpuTask								mGpuTask; //this task include preprepare constraint, prepare constraint, solve and integration tasks
+		PxgPostSolveTask						mPostSolveTask;
+
+		void									doConstraintPrepGPU();
+		void									doPreIntegrationGPU(); //this is the pre integration code(copying data from pxgbodysim to solver body data)
+		void									doArticulationGPU(); //this is the articulation forward dynamic code
+		void									doSoftbodyGPU();//this is the soft body update tetrahedron rotations code
+		void									doFEMClothGPU();
+		void									doConstraintPrePrepGPU(); //this is the pre-prepare code for block format contacts and non-block format joints
+	};
+}
+
+#endif
+
+
--- a/engine/third_party/physx/source/gpusolver/include/PxgCudaSolverCore.h
+++ b/engine/third_party/physx/source/gpusolver/include/PxgCudaSolverCore.h
@@ -0,0 +1,208 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef PXG_CUDA_SOLVERCORE_H
+#define PXG_CUDA_SOLVERCORE_H
+
+#include "PxgSolverCore.h"
+
+namespace physx
+{
+	// PT: TODO: rename to PxgPGSSolverCore ?
+	class PxgCudaSolverCore : public PxgSolverCore
+	{
+		PX_NOCOPY(PxgCudaSolverCore)
+	private:
+
+		//This is the warp-based constraint format. TODO - remove!
+		PxgTypedCudaBuffer<PxgSolverContactHeader>		mContactHeaderStream;
+		PxgTypedCudaBuffer<PxgSolverFrictionHeader>		mFrictionHeaderStream;
+		PxgTypedCudaBuffer<PxgSolverContactPointExt>	mContactStream;
+		PxgTypedCudaBuffer<PxgSolverContactFrictionExt>	mFrictionStream;
+
+		// Each bit encodes the activation of a slab (32 bits). When there are more than 32 slabs, use multiple indices.
+		// To query the reference count, count the number of active slabs/bits.
+		PxgTypedCudaBuffer<PxU32>						mSolverEncodedReferenceCount;
+
+		//This is the new articulation block constraint format!
+		//It shares the original rigid body contact/constraint format but adds in
+		//an additional buffer for the response vectors
+		PxgTypedCudaBuffer<PxgArticulationBlockResponse>		mArtiConstraintBlockResponse;
+
+		PxgTypedCudaBuffer<Dy::ThresholdStreamElement>		mForceThresholdStream;
+		PxgTypedCudaBuffer<Dy::ThresholdStreamElement>		mTmpForceThresholdStream;
+
+		PxgTypedCudaBuffer<PxU32>		mConstraint1DBatchIndices;
+		PxgTypedCudaBuffer<PxU32>		mContactBatchIndices;
+
+		PxgTypedCudaBuffer<PxU32>		mArtiContactBatchIndices;
+		PxgTypedCudaBuffer<PxU32>		mArtiConstraint1dBatchIndices;
+	
+		PxgTypedCudaBuffer<PxReal>		mAccumulatedForceObjectPairs; //store the accumulated force for a pair of objects
+		PxgCudaBufferN<2>	mExceededForceElements;
+		PxgTypedCudaBuffer<Dy::ThresholdStreamElement>		mForceChangeThresholdElements;
+
+		PxgTypedCudaBuffer<PxReal>		mThresholdStreamAccumulatedForce;
+		PxgTypedCudaBuffer<PxReal>		mBlocksThresholdStreamAccumulatedForce;
+
+		PxgTypedCudaBuffer<PxU32>		mThresholdStreamWriteIndex;
+		PxgTypedCudaBuffer<PxU32>		mBlocksThresholdStreamWriteIndex;
+		PxgTypedCudaBuffer<bool>		mThresholdStreamWriteable;
+
+		PxgTypedCudaBuffer<PxU32>		mIslandIds;
+		PxgTypedCudaBuffer<PxU32>		mIslandStaticTouchCount;
+		
+		PxgSolverSharedDesc<IterativeSolveData>*	mSharedDesc;
+
+		void radixSort(const PxU32 nbPasses);
+
+		friend class PxgArticulationCore;
+
+	public:
+
+		bool mFrictionEveryIteration;
+
+		PxgCudaSolverCore(PxgCudaKernelWranglerManager* gpuKernelWrangler, PxCudaContextManager* cudaContextManager, PxgGpuContext* dynamicContext, 
+			PxgHeapMemoryAllocatorManager* heapMemoryManager, const PxGpuDynamicsMemoryConfig& init,
+			const bool frictionEveryIteration);
+		~PxgCudaSolverCore();
+
+		void constructSolverSharedDesc(PxgSolverSharedDesc<IterativeSolveData>& desc, 
+			const PxgConstantData& cData, Cm::UnAlignedSpatialVector* deferredZ, PxU32* articulationDirty, uint4* articulationSlabMask);
+		
+		void constructConstraitPrepareDesc(PxgConstraintPrepareDesc& desc, const PxU32 numDynamicConstraintBatchHeader,
+			const PxU32 numStaticConstraintBatchHeaders, const PxU32 numDynamic1dConstraintBatches, const PxU32 numStatic1dConstraintBatches,
+			const PxU32 numDynamicContactBatches, const PxU32 numStaticContactBatches,
+			const PxU32 numArtiConstraints, const PxU32 numArtiContacts, 
+			const PxU32 numArtiStatic1dConstraintBatches, const PxU32 numArtiStaticContactBatches, 
+			const PxU32 numArtiSelf1dConstraintBatches, const PxU32 numArtiSelfContactBatches,
+			const PxgConstantData& cData, PxU32 totalCurrentEdges,
+			PxU32 totalPreviousEdges, PxU32 totalBodies);
+
+		void constructSolverDesc(PxgSolverCoreDesc& desc, PxU32 numIsland, PxU32 numSolverBodies, PxU32 numConstraintBatchHeader, PxU32 numArticConstraints, PxU32 numSlabs, bool enableStabilization);
+
+		void syncSimulationController();
+
+		virtual void createStreams();
+		virtual void releaseStreams();
+
+		virtual void acquireContext();
+		virtual void releaseContext();
+
+		PxU32 getDescriptorsAllocationSize();
+		void allocatePinnedDescriptors(PxgPinnedHostLinearMemoryAllocator& hostAllocator);
+
+		void gpuMemDMAUpContactData(PxgPinnedHostLinearMemoryAllocator* compressedContactsHostMemoryAllocator,
+				PxU32 compressedContactStreamUpperPartSize, 
+				PxU32 compressedContactStreamLowerPartSize, 
+				PxgPinnedHostLinearMemoryAllocator* compressedPatchesHostMemoryAllocator,
+				PxU32 compressedPatchStreamUpperPartSize, 
+				PxU32 compressedPatchStreamLowerPartSize, 
+				PxU32 totalContactManagers,
+				const PartitionIndexData* partitionIndexData,
+				const PartitionNodeData* partitionNodeData,
+				const PxgSolverConstraintManagerConstants* constantData,
+				PxU32 constantDataCount,
+				PxU32 partitionIndexDataCount,
+				const PxU32* partitionConstraintBatchStartIndices,
+				const PxU32* partitionArticConstraintBatchStartIndices,
+				const PxU32* partitionJointBatchCounts,
+				const PxU32* partitionArtiJointBatchCounts,
+				PxU32 nbPartitions,
+				const PxU32* destroyedEdges,
+				PxU32 nbDestroyedEdges,
+				const PxU32* npIndexArray, PxU32 npIndexArraySize,
+				PxU32 totalNumJoints,
+				const PxU32* islandIds, const PxU32* nodeInteractionCounts, PxU32 nbNodes, const PxU32* islandStaticTouchCount, PxU32 nbIslands);
+
+		void gpuMemDmaUpBodyData(PxPinnedArray<PxgSolverBodyData>& solverBodyDataPool,
+			PxPinnedArray<PxgSolverTxIData>& solverTxIDataPool,
+			const PxU32 numSolverBodies,
+			const PxU32 totalNumRigidBatches, const PxU32 totalNumArticBatches,
+			const PxU32 nbSlabs, const PxU32 nbStaticSlabs, const PxU32 maxNumStaticPartitions);
+
+		void allocateSolverBodyBuffers(const PxU32 numSolverBodies,
+			PxPinnedArray<PxNodeIndex>& islandNodeIndices,
+			const PxU32 numActiveActiculations, const PxU32 maxArticulationLinks);
+
+		void gpuMemDMAUp(PxgPinnedHostLinearMemoryAllocator& hostAllocator, const PxgConstraintPrePrepData& data,
+			const PxU32 numSolverBodies, PxgConstraintBatchHeader* constraintBatchHeaders,
+			PxgIslandContext* islandContextPool, const PxU32 numIslands, const PxgPartitionData& partitionData,
+			const PxU32 numConstraintBatchHeader, const PxU32 numStaticConstraintBatchHeader,
+			const PxU32 numArticConstraintBatchHeader, const PxU32 numStaticArticulationBatchHeader, 
+			const PxU32 numArtiSelfConstraintBatchHeader, const PxgConstantData& cData,
+			const PxU32 numContactBlockes, const PxU32 numFrictionBlockes,
+			const PxU32 numArtiContacts, const PxU32 numArtiFrictions,
+			const PxU32 totalCurrentEdges, const PxU32 totalPreviousEdges, const PxU32 numSlabs, const PxU32 maxNbPartitions,
+			const bool enableStabilization, PxU8* cpuContactPatchStreamBase, PxU8* cpuContactStreamBase, PxU8* cpuForceStreamBase, PxsContactManagerOutputIterator& outputIterator,
+			const PxU32 totalActiveBodyCount, const PxU32 activeBodyStartIndex, const PxU32 nbArticulations, Cm::UnAlignedSpatialVector* deferredZ,
+			PxU32* articulationDirty, uint4* articulationSlabMask, Sc::ShapeInteraction** shapeInteractions, PxReal* restDistances,
+			PxsTorsionalFrictionData* torsionalData,
+			PxU32* artiStaticContactIndices, const PxU32 artiContactIndSize, PxU32* artiStaticJointIndices, PxU32 artiStaticJointSize,
+			PxU32* artiStaticContactCounts, PxU32* artiStaticJointCounts,
+			PxU32* artiSelfContactIndices, const PxU32 artiSelfContactIndSize, PxU32* artiSelfJointIndices, PxU32 artiSelfJointSize,
+			PxU32* artiSelfContactCounts, PxU32* artiSelfJointCounts, 
+			PxU32* rigidStaticContactIndices, const PxU32 rigidContactIndSize, PxU32* rigidStaticJointIndices, const PxU32 rigidStaticJointSize,
+			PxU32* rigidStaticContactCounts, PxU32* rigidSaticJointCounts, const PxReal lengthScale, bool hasForceThresholds);
+
+		void gpuMemDMAbackSolverData(PxU8* forceBufferPool, PxU32 forceBufferOffset, PxU32 forceBufferUpperPartSize,
+			PxU32 forceBufferLowerPartSize, Dy::ThresholdStreamElement* changedElems, bool hasForceThresholds, Dy::ConstraintWriteback* constraintWriteBack,
+			const PxU32 writeBackSize, bool copyAllToHost, Dy::ErrorAccumulator*& contactError);
+
+		void syncDmaBack(PxU32& nbChangedThresholdElements);
+
+		void preIntegration(const PxU32 offset, const PxU32 nbSolverBodies, const PxReal dt, const PxVec3& gravity);
+
+		void jointConstraintBlockPrePrepParallel(PxU32 nbConstraintBatches);
+
+		void jointConstraintPrepareParallel(PxU32 nbJointBatches);
+		void contactConstraintPrepareParallel(PxU32 nbContactBatches);
+		void artiJointConstraintPrepare(PxU32 nbArtiJointBatches);
+		void artiContactConstraintPrepare(PxU32 nbArtiContactBatches);
+		//soft body/cloth/particle constraint prepare
+		void nonRigidConstraintPrepare(PxU32 nbArticulations);
+
+		void solveContactMultiBlockParallel(PxgIslandContext* islandContexts, const PxU32 numIslands, const PxU32 maxPartitions, 
+			PxInt32ArrayPinned& constraintsPerPartition, PxInt32ArrayPinned& artiConstraintsPerPartition, const PxVec3& gravity,
+			PxReal* posIterResidualSharedMem, PxU32 posIterResidualSharedMemSize, Dy::ErrorAccumulator* posIterError, PxPinnedArray<Dy::ErrorAccumulator>& artiContactPosIterError, 
+			PxPinnedArray<Dy::ErrorAccumulator>& perArticulationInternalError);
+
+		void writeBackBlock(PxU32 a, PxgIslandContext& context);
+
+		void solvePartitions(PxgIslandContext* islandContexts, PxInt32ArrayPinned& constraintsPerPartition, PxInt32ArrayPinned& artiConstraintsPerPartition,
+			PxU32 islandIndex, bool doFriction, bool anyArticulationConstraints);
+
+		void accumulatedForceThresholdStream(PxU32 maxNodes);
+		void integrateCoreParallel(const PxU32 offset, const PxU32 nbSolverBodies);
+
+		void getDataStreamBase(void*& contactStreamBase, void*& patchStreamBase, void*& forceAndIndexStreamBase);
+	};
+
+}
+
+#endif
--- a/engine/third_party/physx/source/gpusolver/include/PxgD6Joint.h
+++ b/engine/third_party/physx/source/gpusolver/include/PxgD6Joint.h
@@ -0,0 +1,149 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef PXG_D6_JOINT_H
+#define PXG_D6_JOINT_H
+
+#include "PxConstraintDesc.h"
+#include "PxgD6JointLimit.h"
+
+namespace physx
+{
+	struct PxgD6Drive  // equivalent of PxD6Drive
+	{
+		enum Enum
+		{
+			eX			= 0,		//!< drive along the X-axis
+			eY			= 1,		//!< drive along the Y-axis
+			eZ			= 2,		//!< drive along the Z-axis
+			eSWING		= 3,		//!< drive of displacement from the X-axis
+			eTWIST		= 4,		//!< drive of the displacement around the X-axis
+			eSLERP		= 5,		//!< drive of all three angular degrees along a SLERP-path
+			eSWING1		= 6,
+			eSWING2		= 7,
+			eCOUNT
+		};
+	};
+
+	struct PxgD6Motion
+	{
+		enum Enum
+		{
+			eLOCKED,	//!< The DOF is locked, it does not allow relative motion.
+			eLIMITED,	//!< The DOF is limited, it only allows motion within a specific range.
+			eFREE,		//!< The DOF is free and has its full range of motion.
+			eFORCE_DWORD = 0x7fffffff
+		};
+	};
+
+	struct PxgD6Axis
+	{
+		enum Enum
+		{
+			eX      = 0,	//!< motion along the X axis
+			eY      = 1,	//!< motion along the Y axis
+			eZ      = 2,	//!< motion along the Z axis
+			eTWIST  = 3,	//!< motion around the X axis
+			eSWING1 = 4,	//!< motion around the Y axis
+			eSWING2 = 5,	//!< motion around the Z axis
+			eCOUNT	= 6
+		};
+	};
+
+	struct PxgD6JointDriveFlag
+	{
+		PX_CUDA_CALLABLE PxgD6JointDriveFlag(){}
+
+		enum Enum
+		{
+			// IMPORTANT: the enum values need to match the ones in PxD6JointDriveFlag. Unfortunately, the GPU
+			//            version just copy pasted all the D6 logic. Testing with a compile time assert would
+			//            create a bit of a mess with our code hierarchy (on CPU, joints are a concept known
+			//            to the PhysXExtensions library only)
+
+			eACCELERATION	= (1 << 0),	//!< drive spring is for the acceleration at the joint (rather than the force) 
+			eOUTPUT_FORCE	= (1 << 1)	// see PxD6JointDriveFlag::eOUTPUT_FORCE
+		};
+	};
+	typedef PxFlags<PxgD6JointDriveFlag::Enum, PxU32> PxgD6JointDriveFlags;
+
+	class PxgSpring
+	{
+	public:
+
+		PxReal					stiffness;			//!< the spring strength of the drive: that is, the force proportional to the position error
+		PxReal					damping;			//!< the damping strength of the drive: that is, the force proportional to the velocity error
+
+		PX_CUDA_CALLABLE PxgSpring(PxReal stiffness_, PxReal damping_): stiffness(stiffness_), damping(damping_) {}
+	};
+
+	class PxgD6JointDrive : public PxgSpring
+	{
+
+	public:
+		PxReal						forceLimit;			//!< the force limit of the drive - may be an impulse or a force depending on PxConstraintFlag::eDRIVE_LIMITS_ARE_FORCES
+		PxgD6JointDriveFlags		flags;				//!< the joint drive flags 
+
+
+		/**
+		\brief default constructor for PxD6JointDrive.
+		*/
+
+		PX_CUDA_CALLABLE PxgD6JointDrive(): PxgSpring(0,0), forceLimit(PX_MAX_F32), flags(0) {}
+
+		/**
+		\brief constructor a PxD6JointDrive.
+
+		\param[in] driveStiffness the stiffness of the drive spring.
+		\param[in] driveDamping the damping of the drive spring
+		\param[in] driveForceLimit the maximum impulse or force that can be exerted by the drive
+		\param[in] isAcceleration whether the drive is an acceleration drive or a force drive
+		*/
+
+
+		PX_CUDA_CALLABLE PxgD6JointDrive(PxReal driveStiffness, PxReal driveDamping, PxReal driveForceLimit, bool isAcceleration = false)
+		: PxgSpring(driveStiffness, driveDamping)
+		, forceLimit(driveForceLimit)
+		, flags(isAcceleration?(PxU32)PxgD6JointDriveFlag::eACCELERATION : 0) 
+		{}
+
+
+		/** 
+		\brief returns true if the drive is valid
+		*/
+
+		bool isValid() const
+		{
+			return PxIsFinite(stiffness) && stiffness>=0 &&
+				   PxIsFinite(damping) && damping >=0 &&
+				   PxIsFinite(forceLimit) && forceLimit >=0;
+		}
+	};
+}
+
+#endif
--- a/engine/third_party/physx/source/gpusolver/include/PxgD6JointData.h
+++ b/engine/third_party/physx/source/gpusolver/include/PxgD6JointData.h
@@ -0,0 +1,117 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef PXG_D6_JOINT_DATA_H
+#define PXG_D6_JOINT_DATA_H
+
+#include "PxConstraintDesc.h"
+#include "PxgD6JointLimit.h"
+#include "AlignedTransform.h"
+#include "PxgSolverConstraintDesc.h"
+
+namespace physx
+{
+	struct PxgJointData
+	{
+		PxgConstraintInvMassScale	invMassScale;
+		PxTransform32				c2b[2];
+	};
+
+	struct PxgD6JointData : public PxgJointData
+	{
+	
+	public:
+
+		PX_CUDA_CALLABLE PxgD6JointData(){}
+
+		static constexpr PxU32 sDriveEntryCapacity = 6;
+
+		PxgD6Motion::Enum			motion[6];
+		PxgJointLinearLimit			distanceLimit;
+		PxgJointLinearLimitPair		linearLimitX;
+		PxgJointLinearLimitPair		linearLimitY;
+		PxgJointLinearLimitPair		linearLimitZ;
+		PxgJointAngularLimitPair	twistLimit;
+		PxgJointLimitCone			swingLimit;
+		PxgJointLimitPyramid		pyramidSwingLimit;
+		
+		PxgD6JointDrive				drive[sDriveEntryCapacity];
+
+		PxTransform					drivePosition;
+		PxVec3						driveLinearVelocity;
+		PxVec3						driveAngularVelocity;
+
+		// derived quantities
+
+		PxU32						locked;		// bitmap of locked DOFs
+		PxU32						limited;	// bitmap of limited DOFs
+		PxU32						driving;	// bitmap of active drives (implies driven DOFs not locked)
+
+		PxReal						distanceMinDist;	// distance limit minimum distance to get a good direction
+
+		// PT: the PxD6Motion values are now shared for both kind of linear limits, so we need
+		// an extra bool to know which one(s) should be actually used.
+		bool						mUseDistanceLimit;
+		bool						mUseNewLinearLimits;
+
+		// PT: the swing limits can now be a cone or a pyramid, so we need
+		// an extra bool to know which one(s) should be actually used.
+		bool						mUseConeLimit;
+		bool						mUsePyramidLimits;
+
+		PxU8						angularDriveConfig;  // stores the angular drive config (PxD6AngularDriveConfig::Enum)
+
+		//Please don't add fields above this line since the layout must match D6JointData
+
+		// forestall compiler complaints about not being able to generate a constructor
+	private:
+		PxgD6JointData(const PxgJointLinearLimit& distance,
+			const PxgJointLinearLimitPair& linearX,
+			const PxgJointLinearLimitPair& linearY,
+			const PxgJointLinearLimitPair& linearZ,
+			const PxgJointAngularLimitPair& twist,
+			const PxgJointLimitCone& swing,
+			const PxgJointLimitPyramid& pyramid):
+			distanceLimit(distance),
+			linearLimitX(linearX),
+			linearLimitY(linearY),
+			linearLimitZ(linearZ),
+			twistLimit(twist),
+			swingLimit(swing),
+			pyramidSwingLimit(pyramid),
+			mUseDistanceLimit(false),
+			mUseNewLinearLimits(false),
+			mUseConeLimit(false),
+			mUsePyramidLimits(false),
+			angularDriveConfig(0)
+		{}
+
+	};
+	PX_COMPILE_TIME_ASSERT(sizeof(PxgD6JointData) <= 512);
+}
+#endif
--- a/engine/third_party/physx/source/gpusolver/include/PxgD6JointLimit.h
+++ b/engine/third_party/physx/source/gpusolver/include/PxgD6JointLimit.h
@@ -0,0 +1,340 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef PXG_D6_JOINT_LIMIT_H
+#define PXG_D6_JOINT_LIMIT_H
+
+#include "common/PxTolerancesScale.h"
+#include "PxgD6Joint.h"
+
+namespace physx
+{
+	// PT: why aren't these shared with the CPU ?
+
+	class PxgJointLimitParameters
+	{
+	public:
+		
+		PxReal restitution;
+		PxReal bounceThreshold;
+		PxReal stiffness;
+		PxReal damping;
+
+		PX_CUDA_CALLABLE PxgJointLimitParameters()
+		: restitution(0)
+		, bounceThreshold(0)
+		, stiffness(0)
+		, damping(0)
+		{
+		}	
+
+		/**
+		\brief Returns true if the current settings are valid.
+
+		\return true if the current settings are valid
+		*/
+		PX_INLINE bool isValid() const
+		{
+			return	PxIsFinite(restitution) && restitution >= 0 && restitution <= 1 && 
+					PxIsFinite(stiffness) && stiffness >= 0 && 
+					PxIsFinite(damping) && damping >= 0 &&
+					PxIsFinite(bounceThreshold) && bounceThreshold >= 0;
+		}
+
+		PX_CUDA_CALLABLE PX_INLINE bool isSoft() const
+		{
+			return damping>0 || stiffness>0;
+		}
+	};
+
+
+	class PxgJointLinearLimit : public PxgJointLimitParameters
+	{
+
+	public:
+		
+		PxReal value;
+
+		PX_CUDA_CALLABLE PxgJointLinearLimit(PxReal extent) : value(extent)
+		{
+		}
+
+		PX_CUDA_CALLABLE PxgJointLinearLimit(PxReal extent, const PxgSpring& spring) : value(extent)
+		{
+			stiffness = spring.stiffness;
+			damping = spring.damping;
+		}
+
+		PX_CUDA_CALLABLE PxgJointLinearLimit() {}
+
+		PX_INLINE bool isValid() const
+		{
+			return PxgJointLimitParameters::isValid() &&
+				   PxIsFinite(value) && 
+				   value > 0;
+		}
+	};
+
+	class PxgJointLinearLimitPair : public PxgJointLimitParameters
+	{
+	public:
+
+		PX_CUDA_CALLABLE PxgJointLinearLimitPair() {}
+		/**
+		\brief the range of the limit. The upper limit must be no lower than the
+		lower limit, and if they are equal the limited degree of freedom will be treated as locked.
+
+		<b>Range:</b> See the joint on which the limit is used for details<br>
+		<b>Default:</b> lower = -PX_MAX_F32/3, upper = PX_MAX_F32/3
+		*/
+		PxReal upper, lower;
+
+		/**
+		\brief Construct a linear hard limit pair. The lower distance value must be less than the upper distance value.
+
+		\param[in] scale		A PxTolerancesScale struct. Should be the same as used when creating the PxPhysics object.
+		\param[in] lowerLimit	The lower distance of the limit
+		\param[in] upperLimit	The upper distance of the limit
+
+		\see PxJointLimitParameters PxTolerancesScale
+		*/
+		PX_CUDA_CALLABLE PxgJointLinearLimitPair(const PxTolerancesScale& scale, PxReal lowerLimit, PxReal upperLimit) :
+			upper(upperLimit),
+			lower(lowerLimit)
+		{
+			bounceThreshold = 2.0f*scale.length;
+		}
+
+		/**
+		\brief construct a linear soft limit pair
+
+		\param[in] lowerLimit	The lower distance of the limit
+		\param[in] upperLimit	The upper distance of the limit
+		\param[in] spring		The stiffness and damping parameters of the limit spring
+
+		\see PxJointLimitParameters PxTolerancesScale
+		*/
+		PX_CUDA_CALLABLE PxgJointLinearLimitPair(PxReal lowerLimit, PxReal upperLimit, const PxgSpring& spring) :
+			upper(upperLimit),
+			lower(lowerLimit)
+		{
+			stiffness = spring.stiffness;
+			damping = spring.damping;
+		}
+
+		/**
+		\brief Returns true if the limit is valid.
+
+		\return true if the current settings are valid
+		*/
+		PX_INLINE bool isValid() const
+		{
+			return PxgJointLimitParameters::isValid() &&
+				PxIsFinite(upper) && PxIsFinite(lower) && upper >= lower &&
+				PxIsFinite(upper - lower);
+		}
+	};
+
+
+	class PxgJointAngularLimitPair : public PxgJointLimitParameters
+	{
+	
+	public:
+		
+		PxReal upper, lower;
+
+		PX_CUDA_CALLABLE PxgJointAngularLimitPair(PxReal lowerLimit, PxReal upperLimit)
+		: upper(upperLimit)
+		, lower(lowerLimit)
+		{
+			bounceThreshold = 0.5f;
+		}
+
+
+		PX_CUDA_CALLABLE PxgJointAngularLimitPair(PxReal lowerLimit, PxReal upperLimit, const PxgSpring& spring)
+		: upper(upperLimit)
+		, lower(lowerLimit)
+		{
+			stiffness = spring.stiffness;
+			damping = spring.damping;
+		}
+
+		PX_CUDA_CALLABLE PxgJointAngularLimitPair(){}
+
+
+		/**
+		\brief Returns true if the limit is valid.
+
+		\return true if the current settings are valid
+		*/
+		PX_INLINE bool isValid() const
+		{
+			return PxgJointLimitParameters::isValid() &&
+				   PxIsFinite(upper) && PxIsFinite(lower) && upper >= lower;
+		}
+	};
+
+	class PxgJointLimitCone : public PxgJointLimitParameters
+	{
+	
+	public:
+		
+		PxReal yAngle;
+		PxReal zAngle;
+
+		PX_CUDA_CALLABLE PxgJointLimitCone(PxReal yLimitAngle, PxReal zLimitAngle):
+		  yAngle(yLimitAngle),
+		  zAngle(zLimitAngle)
+		  {
+				bounceThreshold = 0.5f;
+		  }
+
+		PX_CUDA_CALLABLE PxgJointLimitCone(PxReal yLimitAngle, PxReal zLimitAngle, const PxgSpring& spring):
+		  yAngle(yLimitAngle),
+		  zAngle(zLimitAngle)
+		  {
+			  stiffness = spring.stiffness;
+			  damping = spring.damping;
+		  }
+
+		PX_CUDA_CALLABLE PxgJointLimitCone(){}
+
+		/**
+		\brief Returns true if the limit is valid.
+
+		\return true if the current settings are valid
+		*/
+		PX_INLINE bool isValid() const
+		{
+			return PxgJointLimitParameters::isValid() &&
+				   PxIsFinite(yAngle) && yAngle>0 && yAngle<PxPi && 
+				   PxIsFinite(zAngle) && zAngle>0 && zAngle<PxPi;
+		}
+	};
+
+	class PxgJointLimitPyramid : public PxgJointLimitParameters
+	{
+	public:
+		/**
+		\brief the minimum angle from the Y axis of the constraint frame.
+
+		<b>Unit:</b> Angular: Radians
+		<b>Range:</b> Angular: (-PI,PI)<br>
+		<b>Default:</b> -PI/2
+		*/
+		PxReal yAngleMin;
+
+		/**
+		\brief the maximum angle from the Y axis of the constraint frame.
+
+		<b>Unit:</b> Angular: Radians
+		<b>Range:</b> Angular: (-PI,PI)<br>
+		<b>Default:</b> PI/2
+		*/
+		PxReal yAngleMax;
+
+		/**
+		\brief the minimum angle from the Z-axis of the constraint frame.
+
+		<b>Unit:</b> Angular: Radians
+		<b>Range:</b> Angular: (-PI,PI)<br>
+		<b>Default:</b> -PI/2
+		*/
+		PxReal zAngleMin;
+
+		/**
+		\brief the maximum angle from the Z-axis of the constraint frame.
+
+		<b>Unit:</b> Angular: Radians
+		<b>Range:</b> Angular: (-PI,PI)<br>
+		<b>Default:</b> PI/2
+		*/
+		PxReal zAngleMax;
+
+		PX_CUDA_CALLABLE PxgJointLimitPyramid() {}
+
+		/**
+		\brief Construct a pyramid hard limit.
+
+		\param[in] yLimitAngleMin	The minimum limit angle from the Y-axis of the constraint frame
+		\param[in] yLimitAngleMax	The maximum limit angle from the Y-axis of the constraint frame
+		\param[in] zLimitAngleMin	The minimum limit angle from the Z-axis of the constraint frame
+		\param[in] zLimitAngleMax	The maximum limit angle from the Z-axis of the constraint frame
+
+		\see PxJointLimitParameters
+		*/
+		PX_CUDA_CALLABLE PxgJointLimitPyramid(PxReal yLimitAngleMin, PxReal yLimitAngleMax, PxReal zLimitAngleMin, PxReal zLimitAngleMax) :
+			yAngleMin(yLimitAngleMin),
+			yAngleMax(yLimitAngleMax),
+			zAngleMin(zLimitAngleMin),
+			zAngleMax(zLimitAngleMax)
+		{
+			bounceThreshold = 0.5f;
+		}
+
+		/**
+		\brief Construct a pyramid soft limit.
+
+		\param[in] yLimitAngleMin	The minimum limit angle from the Y-axis of the constraint frame
+		\param[in] yLimitAngleMax	The maximum limit angle from the Y-axis of the constraint frame
+		\param[in] zLimitAngleMin	The minimum limit angle from the Z-axis of the constraint frame
+		\param[in] zLimitAngleMax	The maximum limit angle from the Z-axis of the constraint frame
+		\param[in] spring			The stiffness and damping of the limit spring
+
+		\see PxJointLimitParameters
+		*/
+		PX_CUDA_CALLABLE PxgJointLimitPyramid(PxReal yLimitAngleMin, PxReal yLimitAngleMax, PxReal zLimitAngleMin, PxReal zLimitAngleMax, const PxgSpring& spring) :
+			yAngleMin(yLimitAngleMin),
+			yAngleMax(yLimitAngleMax),
+			zAngleMin(zLimitAngleMin),
+			zAngleMax(zLimitAngleMax)
+		{
+			stiffness = spring.stiffness;
+			damping = spring.damping;
+		}
+
+		/**
+		\brief Returns true if the limit is valid.
+
+		\return true if the current settings are valid
+		*/
+		PX_INLINE bool isValid() const
+		{
+			return PxgJointLimitParameters::isValid() &&
+				PxIsFinite(yAngleMin) && yAngleMin>-PxPi && yAngleMin<PxPi &&
+				PxIsFinite(yAngleMax) && yAngleMax>-PxPi && yAngleMax<PxPi &&
+				PxIsFinite(zAngleMin) && zAngleMin>-PxPi && zAngleMin<PxPi &&
+				PxIsFinite(zAngleMax) && zAngleMax>-PxPi && zAngleMax<PxPi &&
+				yAngleMax >= yAngleMin && zAngleMax >= zAngleMin;
+		}
+	};
+
+
+}
+
+#endif
--- a/engine/third_party/physx/source/gpusolver/include/PxgDynamicsConfiguration.h
+++ b/engine/third_party/physx/source/gpusolver/include/PxgDynamicsConfiguration.h
@@ -0,0 +1,35 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef PXG_DYNAMICS_CONFIGURATION_H
+#define	PXG_DYNAMICS_CONFIGURATION_H
+
+#define PXG_BATCH_SIZE	32u
+
+
+#endif
--- a/engine/third_party/physx/source/gpusolver/include/PxgDynamicsContext.h
+++ b/engine/third_party/physx/source/gpusolver/include/PxgDynamicsContext.h
@@ -0,0 +1,64 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef PXG_DYNAMICS_CONTEXT_H
+#define PXG_DYNAMICS_CONTEXT_H
+
+#include "PxgContext.h"
+
+namespace physx
+{
+	namespace Cm
+	{
+		class FlushPool;
+	}
+
+	class PxBaseTask;
+
+	class PxsKernelWranglerManager;
+
+	/**
+	\brief A class to represent a GPU dynamics context for the GPU rigid body solver
+	*/
+	class PxgDynamicsContext : public PxgGpuContext
+	{
+		PX_NOCOPY(PxgDynamicsContext)
+
+	public:
+		PxgDynamicsContext(Cm::FlushPool& flushPool, PxsKernelWranglerManager* gpuKernelWrangler, PxCudaContextManager* cudaContextManager,
+			const PxGpuDynamicsMemoryConfig& config, IG::SimpleIslandManager& islandManager, PxU32 maxNumPartitions, PxU32 maxNumStaticPartitions,
+			bool enableStabilization, bool useEnhancedDeterminism, PxReal maxBiasCoefficient, PxvSimStats& simStats,
+			PxgHeapMemoryAllocatorManager* heapMemoryManager, bool frictionEveryIteration, PxReal lengthScale, bool enableDirectGPUAPI, PxU64 contextID, bool isResidualReportingEnabled);
+
+		virtual void						destroy();
+
+		virtual PxSolverType::Enum			getSolverType()	const { return PxSolverType::ePGS; }
+	};
+}
+
+#endif
--- a/engine/third_party/physx/source/gpusolver/include/PxgEdgeType.h
+++ b/engine/third_party/physx/source/gpusolver/include/PxgEdgeType.h
@@ -0,0 +1,51 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef PXG_EDGE_TYPE_H
+#define PXG_EDGE_TYPE_H
+
+namespace physx
+{
+//This is the same as IG::Edge::EdgeType, but we have more enum type so we can represent articulation
+//contacts and joints
+
+struct PxgEdgeType
+{
+	enum Enum
+	{
+		eCONTACT_MANAGER	= 0,
+		eCONSTRAINT,
+		eARTICULATION_CONTACT,
+		eARTICULATION_CONSTRAINT,
+		eEDGE_TYPE_COUNT
+	};
+};
+
+}
+
+#endif
--- a/engine/third_party/physx/source/gpusolver/include/PxgFrictionPatch.h
+++ b/engine/third_party/physx/source/gpusolver/include/PxgFrictionPatch.h
@@ -0,0 +1,126 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#include "foundation/PxSimpleTypes.h"
+#include "foundation/PxVec3.h"
+#include "PxvConfig.h"
+
+#ifndef PXG_FRICTION_PATCH_H
+#define PXG_FRICTION_PATCH_H
+
+namespace physx
+{
+
+struct PxgFrictionPatch	
+{
+	float4	body0Normal;
+	float4	body1Normal;
+
+	float4	body0Anchors[2];
+	float4	body1Anchors[2];
+
+	PxU32	anchorCount;
+	PxU32	broken;
+	PxU32	contactID[2];
+
+	PX_CUDA_CALLABLE PX_FORCE_INLINE	void	operator = (const PxgFrictionPatch& other)
+	{
+		broken = other.broken;
+		anchorCount = other.anchorCount;
+		body0Normal = other.body0Normal;
+		body1Normal = other.body1Normal;
+		body0Anchors[0] = other.body0Anchors[0];   
+		body0Anchors[1] = other.body0Anchors[1];
+		body1Anchors[0] = other.body1Anchors[0];
+		body1Anchors[1] = other.body1Anchors[1];
+		contactID[0] = other.contactID[0];
+		contactID[1] = other.contactID[1];
+	}
+};  
+
+PX_COMPILE_TIME_ASSERT(sizeof(PxgFrictionPatch)==112);
+
+struct PxgBlockFrictionPatch
+{
+	PX_ALIGN(256, float4 body0Normal[32]);
+	PX_ALIGN(256, float4 body1Normal[32]);
+
+	PX_ALIGN(128, PxU32 anchorCount[32]);
+	PX_ALIGN(128, PxU32 broken[32]);
+	PX_ALIGN(128, PxU32 contactID[2][32]);
+
+	PX_ALIGN(256, float4 anchorPoints[2][32]);
+	PX_ALIGN(128, PxU32 patchIndex[32]);
+};
+
+struct PxgBlockFrictionAnchorPatch
+{
+	PX_ALIGN(256, float4 body0Anchors[2][32]);	//1024	1024
+	PX_ALIGN(256, float4 body1Anchors[2][32]);	//2048	1024
+};
+
+struct PxgFrictionAnchorPatch
+{
+	float4 body0Anchors[2];	
+	float4 body1Anchors[2];	
+};
+
+struct PxgFrictionPatchGPU
+{
+	static const PxU32 MAX_ANCHORS = 2;	//!< Patch friction anchor max count
+	PxVec3 points[MAX_ANCHORS];			//!< Patch friction anchors points
+	PxVec3 impulses[MAX_ANCHORS];		//!< Patch friction impulses at anchors
+	PxU32 anchors;						//!< Patch friction anchor count
+};
+
+/**
+This class is used for friction correlation using the block friction format. The idea is simple - we have an array of these block friction index objects.
+These objects contain a pointer to the block patch and the index that this particular constraint is in that structure.
+This allows us to allocate individual block friction patches per-block constraint and then index into them.
+
+Advantage - we can use colaesced memory access patterns for the friction patches, broken flags etc. Can remove the need for multiple pointers to friction patches in constraint descs.
+Disadvantage - one extra level of indirection to access previous friction patches. No guarantees that accessing previous friction patches won't diverge (in practice,
+they should be similar but they could still diverge if new constraints are introduced that change the layout of constraints within a given partition).
+*/
+struct PxgBlockFrictionIndex
+{
+	PxU64 mPatchIndex_threadIdxLow;
+
+	PX_CUDA_CALLABLE PxU64 getPatchIndex() const { return mPatchIndex_threadIdxLow >> 5ull; }
+	PX_CUDA_CALLABLE PxU32 getThreadIdx() const { return PxU32(mPatchIndex_threadIdxLow&31); }
+
+	PX_CUDA_CALLABLE void createPatchIndex(const PxU32 patchIndex, const PxU32 threadIndexInWarp)
+	{
+		mPatchIndex_threadIdxLow = (PxU64(patchIndex) << 5ull) | threadIndexInWarp;
+	}
+};
+
+
+}
+
+#endif
--- a/engine/third_party/physx/source/gpusolver/include/PxgIslandContext.h
+++ b/engine/third_party/physx/source/gpusolver/include/PxgIslandContext.h
@@ -0,0 +1,66 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef PXG_ISLAND_CONTEXT_H
+#define PXG_ISLAND_CONTEXT_H
+
+#include "foundation/PxArray.h"
+
+namespace physx
+{
+	struct PxgIslandContext
+	{
+		PxU32 mBodyStartIndex;
+		PxU32 mBodyCount;
+
+		PxU32 mArticulationCount;
+
+		PxU32 mDescStartIndex;
+		PxU32 mDescCount;
+
+		PxI32 mNumPositionIterations;
+		PxI32 mNumVelocityIterations;
+
+		PxU32 mStartPartitionIndex;
+		PxU32 mNumPartitions;
+
+		PxU32 mBatchStartIndex;
+		PxU32 mBatchCount;
+
+		PxU32 mArtiBatchStartIndex;
+		PxU32 mArtiBatchCount;
+		PxU32 mStaticArtiBatchCount;
+		PxU32 mSelfArtiBatchCount;
+
+		PxU32 mStaticRigidBatchCount;
+
+		PxReal mBiasCoefficient;
+
+	};
+}
+#endif
--- a/engine/third_party/physx/source/gpusolver/include/PxgPartitionNode.h
+++ b/engine/third_party/physx/source/gpusolver/include/PxgPartitionNode.h
@@ -0,0 +1,60 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef PXG_PARTITION_NODE_H
+#define PXG_PARTITION_NODE_H
+
+#include "PxNodeIndex.h"
+
+namespace physx
+{
+	struct PartitionIndexData
+	{
+		PxU16 mPartitionIndex;		//! The current partition this edge is in. Used to find the edge efficiently. PxU8 is probably too small (256 partitions max) but PxU16 should be more than enough
+		PxU8 mPatchIndex;			//! The patch index for this partition edge. There may be multiple entries for a given edge if there are multiple patches.
+		PxU8 mCType;				//! The type of constraint this is (PxgEdgeType)
+		PxU32 mPartitionEntryIndex;	//! index of partition edges for this partition
+	};
+
+	// PT: stored in incremental partition code's mPartitionNodeArray,
+	// indexed by a partition edge's unique index.
+	struct PartitionNodeData
+	{
+		// PT: copies of PartitionEdge' node indices (the nodes connected by this edge)
+		// - created in PxgIncrementalPartition::addEdge_Stage1
+		PxNodeIndex mNodeIndex0;
+		PxNodeIndex mNodeIndex1;
+
+		// PT: links to next edge unique indices containing the same nodes
+		// - computed in PxgIncrementalPartition::addEdge_Stage2 => PxgIncrementalPartition::addEdgeInternal
+		// - used in constraintContactBlockPrePrepLaunch / constraint1DBlockPrePrepLaunch
+		// - unclear what we need this for
+		PxU32 mNextIndex[2];
+	};
+}
+#endif
--- a/engine/third_party/physx/source/gpusolver/include/PxgSolver.h
+++ b/engine/third_party/physx/source/gpusolver/include/PxgSolver.h
@@ -0,0 +1,40 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef PXG_SOLVER_H
+#define PXG_SOLVER_H
+
+
+namespace physx
+{
+	
+	//this is needed to force PhysXSolverGpu linkage as Static Library!
+	void createPxgSolver();
+}
+
+#endif
--- a/engine/third_party/physx/source/gpusolver/include/PxgSolverBody.h
+++ b/engine/third_party/physx/source/gpusolver/include/PxgSolverBody.h
@@ -0,0 +1,202 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef PXG_SOLVER_BODY_H
+#define PXG_SOLVER_BODY_H
+
+#include "PxvConfig.h"
+#include "foundation/PxSimpleTypes.h"
+#include "foundation/PxVec3.h"
+#include "foundation/PxMat33.h"
+#include "foundation/PxTransform.h"
+#if !PX_CUDA_COMPILER
+#include <vector_types.h>
+#endif
+#include "AlignedMat33.h"
+#include "AlignedTransform.h"
+#include "PxNodeIndex.h"
+#include "PxSpatialMatrix.h"
+
+namespace physx
+{
+
+class PxsRigidBody;
+struct PxgSolverBody;
+class PxgArticulation;
+
+struct PxgSolverTxIData
+{
+	PxTransform			deltaBody2World;	// 64 body delta transform
+	PxMat33				sqrtInvInertia;		// 36 inverse inertia in world space
+};
+
+struct PxgSolverBodyPrepData
+{
+#if !PX_CUDA_COMPILER
+	PX_ALIGN(16, PxVec3			initialAngVel);					//	12 initial ang vel
+	PxReal						penBiasClamp;					//	16 the penetration bias clamp
+	PxVec3						initialLinVel;					//	28 initial lin vel
+	PxReal						invMass;						//	32 inverse mass
+#else
+	float4						initialAngVelXYZ_penBiasClamp;
+	float4						initialLinVelXYZ_invMassW;
+#endif	
+
+	PxAlignedTransform			body2World;
+
+#if !PX_CUDA_COMPILER
+	PX_FORCE_INLINE PxReal projectVelocity(const PxVec3& lin, const PxVec3& ang)	const
+	{
+		return initialLinVel.dot(lin) + initialAngVel.dot(ang);
+	}
+#else
+	PX_CUDA_CALLABLE PX_FORCE_INLINE PxReal projectVelocity(const PxVec3& lin, const PxVec3& ang)	const
+	{
+		//return initialLinVel.dot(lin) + initialAngVel.dot(ang);
+		PxVec3 initialLinVel(initialLinVelXYZ_invMassW.x, initialLinVelXYZ_invMassW.y, initialLinVelXYZ_invMassW.z);
+		PxVec3 initialAngVel(initialAngVelXYZ_penBiasClamp.x, initialAngVelXYZ_penBiasClamp.y, initialAngVelXYZ_penBiasClamp.z);
+		return initialLinVel.dot(lin) + initialAngVel.dot(ang);
+	}
+#endif
+};
+
+#if PX_VC
+#pragma warning(push)
+#pragma warning(disable : 4324)
+#endif
+
+struct PxgSolverBodyData : public PxgSolverBodyPrepData
+{
+	PxNodeIndex		islandNodeIndex;		// 40
+	PxReal			reportThreshold;		// 44 contact force threshold	
+	PxReal			maxImpulse;				// 48
+	PxU32			flags;					// 52 hasSpeculativeCCD etc.
+	PxReal			offsetSlop;
+};
+#if PX_VC
+#pragma warning(pop)
+#endif
+
+PX_COMPILE_TIME_ASSERT((sizeof(PxgSolverBodyData)& 0xf) == 0);
+
+class PxgSolverExtBody
+{
+public:
+	union
+	{
+		const PxgArticulation* articulation;
+		const PxgSolverBodyData* body;
+	};
+
+	//if linkIndex is 0xffff, the solver body is rigid body, otherwise, it is articulation
+	PxU16 linkIndex;
+	PxU16 isKinematic;
+	PxU32 bodyIndex;
+	PxU32 islandNodeIndex;
+
+};
+
+struct PxgSolverExtBody2
+{
+	PxSpatialMatrix mSpatialResponse;		//144
+	Cm::UnAlignedSpatialVector velocity;	//168
+	PxTransform body2World;					//196
+	PxReal penBiasClamp;					//200
+	PxReal maxImpulse;						//204
+	PxU16 linkIndex;						//206
+	PxU16 isKinematic;						//208
+	PxU32 bodyIndex;						//212
+	PxNodeIndex islandNodeIndex;			//216
+	PxReal cfm;								//220
+	PxReal offsetSlop;						//224
+};
+
+//we need to DMA back the sleep data to CPU. PxgBodySim has the same information. However, PxgBodySim is too
+//big to dma back.
+struct PxgSolverBodySleepData
+{
+	PxReal						wakeCounter;
+	PxU32						internalFlags;
+};
+
+#if PX_VC
+#pragma warning(push)
+#pragma warning (disable : 4201)
+#endif
+struct PxgSolverBody
+{
+#if !PX_CUDA_COMPILER
+	PX_ALIGN(16, PxVec3	linearVelocity);	// post-solver linear velocity in world space
+	PxU32 pad;
+	PxVec3				angularVelocity;	// post-solver angular velocity in world space	
+	PxU32 pad2;	
+#else
+	float4				linearVelocity;
+	float4				angularVelocity;
+#endif
+};
+#if PX_VC
+#pragma warning(pop)
+#endif
+
+PX_COMPILE_TIME_ASSERT(sizeof(PxgSolverBody) == 32);
+
+
+#if PX_VC
+#pragma warning(push)
+#pragma warning (disable : 4201)
+#endif
+struct PxgTGSSolverBody
+{
+#if !PX_CUDA_COMPILER
+	PX_ALIGN(16, PxVec3		linearVelocity);				// 12 post-solver linear velocity in world space
+	PxVec3					angularVelocity;				// 24 post-solver angular velocity in world space	
+	PxVec3					linearDelta;					// 36 linear delta motion in world space
+	PxVec3					angularDelta;					// 48 angular delta motion in world space
+#else
+	float4					linearVelocityXYZ_angX;
+	float4					angularVelocityYZ_linDeltaXY;
+	float4					linDeltaZ_angDeltaXYZ;
+#endif
+};
+#if PX_VC
+#pragma warning(pop)
+#endif
+
+PX_COMPILE_TIME_ASSERT(sizeof(PxgSolverBody) == 32);
+
+
+struct PxgSolverReferences
+{
+	PxU32 mRemappedBodyIndex;
+};
+
+}
+
+#endif
+
--- a/engine/third_party/physx/source/gpusolver/include/PxgSolverConstraint1D.h
+++ b/engine/third_party/physx/source/gpusolver/include/PxgSolverConstraint1D.h
@@ -0,0 +1,159 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#ifndef PXG_SOLVER_CONSTRAINT_1D_H
+#define PXG_SOLVER_CONSTRAINT_1D_H
+
+#include "foundation/PxVec3.h"
+#include "PxConstraintDesc.h"
+#include "PxgSolverConstraintDesc.h"
+#include "DySolverConstraintTypes.h"
+#include "CmSpatialVector.h"
+
+namespace physx
+{
+
+struct PxgSolverConstraint1DHeader
+{
+	float4		body0WorldOffset_linBreakImpulse;
+	//Strict ordering required - invInertiaScale0->invMassScale0->invInertiaScale1->invMassScale1. Do not change!
+	PxReal		invInertiaScale0;
+	PxReal		invMassScale0;
+	PxReal		invInertiaScale1;
+	PxReal		invMassScale1;
+	
+
+	PxU32		rowCounts;								// numbers of rows each 1D constraints 
+	PxU32		breakable;											// indicate whether the constraint are breakable or not 
+	PxReal		angBreakImpulse;
+	PxU32		writeBackOffset;
+};
+
+PX_COMPILE_TIME_ASSERT(sizeof(PxgSolverConstraint1DHeader) == 48);
+
+PX_ALIGN_PREFIX(16)
+struct PxgSolverConstraint1DCon
+{
+	PxVec3 ang0;							//12	12
+	PxVec3 lin0;							//12	24
+	PxVec3 ang1;							//12	36
+	PxVec3 lin1;							//12	48
+	Cm::UnAlignedSpatialVector deltaVA;		//24	72
+	Cm::UnAlignedSpatialVector deltaVB;		//24	96
+	PxReal minImpulse;						//4		100
+	PxReal maxImpulse;						//4		104
+	PxReal velMultiplier;					//4		108
+	PxReal impulseMultiplier;				//4		112
+
+} PX_ALIGN_SUFFIX(16);
+
+PX_COMPILE_TIME_ASSERT(sizeof(PxgSolverConstraint1DCon) == 112);
+
+struct PxgSolverConstraint1DMod
+{
+	PxVec3	ang0Writeback;					//!< unscaled angular velocity projection (body 0)
+	PxReal	constant;						//!< constant	 
+	PxReal	unbiasedConstant;				//!< unbiased constant
+	PxReal	appliedForce;					//!< applied force to correct velocity+bias
+	PxU32	flags;							
+};
+
+
+
+struct PxgTGSSolverConstraint1DHeader
+{
+	PxU16		rowCounts;								// numbers of rows each 1D constraints 
+	PxU16		breakable;								// indicate whether the constraint are breakable or not 
+	PxReal		linBreakImpulse;
+	PxReal		angBreakImpulse;
+	PxU32		writeBackOffset;
+
+	PxVec4		raWorld;
+	PxVec4		rbWorld;
+
+	//Strict ordering required - invInertiaScale0->invMassScale0->invInertiaScale1->invMassScale1. Do not change!
+	PxReal		invInertiaScale0;
+	PxReal		invMassScale0;
+	PxReal		invInertiaScale1;
+	PxReal		invMassScale1;
+	
+
+	//There is no orthogonalization with articulation constraints, so we do not need to
+	//add anything to reflect that in this code!
+};
+
+PX_ALIGN_PREFIX(16)
+struct PxgTGSSolverConstraint1DCon
+{
+	PxVec3 ang0;							//12	12
+	PxVec3 lin0;							//12	24
+	PxVec3 ang1;							//12	36
+	PxVec3 lin1;							//12	48
+	Cm::UnAlignedSpatialVector deltaVA;		//24	72
+	Cm::UnAlignedSpatialVector deltaVB;		//24	96
+
+	PxReal minImpulse;						//4		100
+	PxReal maxImpulse;						//4		104
+	PxReal velMultiplier;					//4		108
+	PxReal impulseMultiplier;				//4		112
+
+	PxReal error;							//4		116
+	PxReal velTarget;						//4		120
+	PxReal recipResponse;					//4		124
+	PxReal angularErrorScale;				//4		128
+
+} PX_ALIGN_SUFFIX(16);
+
+PX_COMPILE_TIME_ASSERT(sizeof(PxgTGSSolverConstraint1DCon) == 128);
+
+struct PxgTGSSolverConstraint1DMod
+{
+	PxReal	appliedForce;					//!< applied force to correct velocity+bias
+	PxReal	maxBias;
+	PxReal	biasScale;						
+	PxU32	flags;
+};
+
+struct PxgJointParams
+{
+	PxgSolverConstraint1DHeader* jointHeader;
+	PxgSolverConstraint1DCon* jointCon;
+	PxgSolverConstraint1DMod* jointMod;
+};
+
+struct PxgTGSJointParams
+{
+	PxgTGSSolverConstraint1DHeader* jointHeader;
+	PxgTGSSolverConstraint1DCon* jointCon;
+	PxgTGSSolverConstraint1DMod* jointMod;
+};
+
+
+}
+
+#endif
--- a/engine/third_party/physx/source/gpusolver/include/PxgSolverConstraintBlock1D.h
+++ b/engine/third_party/physx/source/gpusolver/include/PxgSolverConstraintBlock1D.h
@@ -0,0 +1,202 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#ifndef PXG_SOLVER_CONSTRAINT_BLOCK_1D_H
+#define PXG_SOLVER_CONSTRAINT_BLOCK_1D_H
+
+#include "foundation/PxVec3.h"
+#include "PxConstraintDesc.h"
+#include "DySolverConstraintTypes.h"
+#include "PxgSolverConstraintDesc.h"
+#include "vector_types.h"
+#include "vector_functions.h"
+
+
+namespace physx
+{
+
+#if PX_VC
+#pragma warning(push)
+#pragma warning(disable : 4324)
+#endif
+struct PxgBlockSolverConstraint1DHeader
+{
+	PX_ALIGN(128, PxU16		rowCounts[32]);								// numbers of rows each 1D constraints 
+	PX_ALIGN(128, PxU16		breakable[32]);											// indicate whether the constraint are breakable or not 
+	PX_ALIGN(128, PxReal	angBreakImpulse[32]);
+	PX_ALIGN(128, float4	body0WorldOffset_linBreakImpulse[32]);
+	PX_ALIGN(128, PxReal	invMass0D0[32]);
+	PX_ALIGN(128, PxReal	invMass1D1[32]);
+	PX_ALIGN(128, PxReal	invInertiaScale0[32]);
+	PX_ALIGN(128, PxReal	invInertiaScale1[32]);
+	PX_ALIGN(128, PxU32		writeBackOffset[32]);
+
+	PX_ALIGN(128, PxReal	cfm[32]);
+};
+
+  
+
+struct PxgBlockSolverConstraint1DCon 
+{
+public:
+
+	// To use different mass for mass-splitting every sub-timestep (or iteration),
+	// unitResponse, recipResponse, velMultiplier, etc. are computed every sub-timestep (or iteration).
+	// To compute them at every sub-timestep (or iteration), resp0, resp1, and other relevant data are stored additionally.
+	PX_ALIGN(256, float4	lin0XYZ_minImpulse[32]);			//512	512		//!< linear velocity projection (body 0) and min impulse term	
+	PX_ALIGN(256, float4	lin1XYZ_maxImpulse[32]);			//1024	512		//!< linear velocity projection (body 1) and max impulse term
+	PX_ALIGN(256, float4	ang0XYZ_resp0[32]);					//1536	512		//!< angular velocity projection (body 0) and resp0
+	PX_ALIGN(256, float4	ang1XYZ_resp1[32]);					//2048	512		//!< angular velocity projection (body 1) and resp1
+	PX_ALIGN(256, PxReal	initJointSpeed[32]);				
+} ;
+
+struct PxgBlockSolverConstraint1DMod 
+{
+public:
+	PX_ALIGN(128, PxVec3	ang0Writeback[32]);					//!< unscaled angular velocity projection (body 0)
+	PX_ALIGN(128, PxReal	appliedForce[32]);					//!< applied force to correct velocity+bias
+	PX_ALIGN(128, PxU32		flags[32]);							
+	PX_ALIGN(128, PxReal	residual[32]);
+
+	// coeff0, coeff1: coefficients used to compute constant, unbiasedConstant, velMultiplier, and impulseMultiplier.
+	// See also "queryReduced1dConstraintSolverConstantsPGS" 
+	PX_ALIGN(128, PxReal	coeff0[32]);						
+	PX_ALIGN(128, PxReal	coeff1[32]);						
+} ;
+
+#if PX_VC
+#pragma warning(pop)
+#endif
+
+
+#if PX_VC
+#pragma warning(push)
+#pragma warning(disable : 4324)
+#endif
+struct PxgTGSBlockSolverConstraint1DHeader
+{
+	PX_ALIGN(128, uchar4	rowCounts_breakable_orthoAxisCount[32]);
+	PX_ALIGN(128, float4	rAWorld_invMass0D0[32]);
+	PX_ALIGN(128, float4	rBWorld_invMass1D1[32]);
+	PX_ALIGN(128, PxReal	invInertiaScale0[32]);
+	PX_ALIGN(128, PxReal	invInertiaScale1[32]);
+	PX_ALIGN(128, PxU32		writeBackOffset[32]);
+
+	//Orthogonalization data
+	PX_ALIGN(128, float4	angOrthoAxis0_recipResponseW[3][32]);
+	PX_ALIGN(128, float4	angOrthoAxis1_ErrorW[3][32]);
+
+	PX_ALIGN(128, PxReal	linBreakImpulse[32]);
+	PX_ALIGN(128, PxReal	angBreakImpulse[32]);
+
+	PX_ALIGN(128, PxReal	cfm[32]);
+};
+
+
+
+struct PxgTGSBlockSolverConstraint1DCon
+{
+public:
+	// For rigid body joints, coef0, coef1, coef2, and coef3 store initBias, biasScale, velMultiplier, and velTarget,
+	// respectively.
+	 
+	// For articulation, the coefficients used in "compute1dConstraintSolverConstantsTGS" and
+	// "queryReduced1dConstraintSolverConstantsTGS" are stored in the last (w) component. 
+
+	PX_ALIGN(128, float4	lin0XYZ_initBiasOrCoeff0[32]);				//512	512		//!< linear velocity projection (body 0) and an additional coef	
+	PX_ALIGN(128, float4	lin1XYZ_biasScaleOrCoeff1[32]);				//1024	512		//!< linear velocity projection (body 1) and an additional coef
+	PX_ALIGN(128, float4	ang0XYZ_velMultiplierOrCoeff2[32]);			//1536	512		//!< angular velocity projection (body 0) and an additional coef
+	PX_ALIGN(128, float4	ang1XYZ_velTargetOrCoeff3[32]);				//2048	512		//!< angular velocity projection (body 1) and an additional coef
+
+	// resp0, resp1, and other relevant data are stored additionally.
+	PX_ALIGN(128, PxReal	resp0[32]);
+	PX_ALIGN(128, PxReal	resp1[32]);
+	PX_ALIGN(128, PxReal	geometricError[32]);
+
+	PX_ALIGN(128, PxReal	minImpulse[32]);
+	PX_ALIGN(128, PxReal	maxImpulse[32]);
+	PX_ALIGN(128, PxReal	maxBias[32]);
+	PX_ALIGN(128, PxReal	angularErrorScale[32]);
+	PX_ALIGN(128, PxU32		flags[32]);
+	PX_ALIGN(128, PxReal	appliedForce[32]);
+	PX_ALIGN(128, PxReal	residual[32]);
+};
+
+
+#if PX_VC
+#pragma warning(pop)
+#endif
+
+
+PX_CUDA_CALLABLE PX_FORCE_INLINE void init(PxgBlockSolverConstraint1DCon& ccon, PxgBlockSolverConstraint1DMod& cmod,
+						  const PxVec3& _linear0, const PxVec3& _linear1, 
+						  const PxVec3& _angular0, const PxVec3& _angular1,
+						  const PxReal _minImpulse, const PxReal _maxImpulse, 
+						  const PxU32 index)
+{
+	PX_ASSERT(_linear0.isFinite());
+	PX_ASSERT(_linear1.isFinite());
+
+	ccon.lin0XYZ_minImpulse[index] = make_float4(_linear0.x, _linear0.y, _linear0.z, _minImpulse);
+	ccon.lin1XYZ_maxImpulse[index] = make_float4(_linear1.x, _linear1.y, _linear1.z, _maxImpulse);
+	ccon.ang0XYZ_resp0[index] = make_float4(_angular0.x, _angular0.y, _angular0.z, 0.f);
+	ccon.ang1XYZ_resp1[index] = make_float4(_angular1.x, _angular1.y, _angular1.z, 0.f);
+	ccon.initJointSpeed[index] = 0.f;
+
+	cmod.coeff0[index] = 0.f;
+	cmod.coeff1[index] = 0.f;
+
+	cmod.flags[index]					= 0;
+	cmod.appliedForce[index]			= 0.f;
+	cmod.residual[index]				= 0.f;
+}
+
+struct PxgJointBlockParams
+{
+	PxgBlockSolverConstraint1DHeader* jointHeader;
+	PxgBlockSolverConstraint1DCon* jointCon;
+	PxgBlockSolverConstraint1DMod* jointMod;
+	PxReal dt;
+	PxReal invDt;
+};
+
+struct PxgTGSJointBlockParams
+{
+	PxgTGSBlockSolverConstraint1DHeader* jointHeader;
+	PxgTGSBlockSolverConstraint1DCon* jointCon;
+	PxReal dt;
+	PxReal totalDt;
+	PxReal invDt;
+	PxReal invTotalDt;
+	PxReal lengthScale;
+	PxReal biasCoefficient;
+};
+
+}
+
+#endif
--- a/engine/third_party/physx/source/gpusolver/include/PxgSolverConstraintDesc.h
+++ b/engine/third_party/physx/source/gpusolver/include/PxgSolverConstraintDesc.h
@@ -0,0 +1,217 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef PXG_SOLVER_CONSTRAINT_DESC_H
+#define PXG_SOLVER_CONSTRAINT_DESC_H
+
+#include "foundation/PxSimpleTypes.h"
+#include "foundation/PxPreprocessor.h"
+#include <vector_types.h>
+#include "PxNodeIndex.h"
+
+
+namespace physx
+{
+struct PxgSolverBody;
+
+#if PX_VC
+#pragma warning(push)
+#pragma warning(disable : 4324)
+#endif
+struct PxgBlockConstraint1DVelocities
+{
+	PX_ALIGN(256, float4	linear0XYZ_geometricErrorW[32]);			//!< linear component of velocity jacobian in world space, geometric error of the constraint along this axi
+	PX_ALIGN(256, float4	angular0XYZ_velocityTargetW[32]);			//!< angular component of velocity jacobian in world space, velocity target for the constraint along this axis
+
+	PX_ALIGN(256, float4	linear1XYZ_minImpulseW[32]);				//!< linear component of velocity jacobian in world space, minimum impulse the solver may apply to enforce this constraint
+	PX_ALIGN(256, float4	angular1XYZ_maxImpulseW[32]);				//!< angular component of velocity jacobian in world space, maximum impulse the solver may apply to enforce this constraint
+};
+
+struct PxgBlockConstraint1DParameters
+{
+	
+	union
+	{
+		struct SpringModifiers
+		{
+			PX_ALIGN(128, PxReal	stiffness[32]);				//!< spring parameter, for spring constraints
+			PX_ALIGN(128, PxReal	damping[32]);				//!< damping parameter, for spring constraints
+		} spring;
+		struct RestitutionModifiers
+		{
+			PX_ALIGN(128, PxReal	restitution[32]);			//!< restitution parameter for determining additional "bounce"
+			PX_ALIGN(128, PxReal	velocityThreshold[32]);		//!< minimum impact velocity for bounce
+		} bounce;
+	} mods;
+
+	PX_ALIGN(128, PxU32		flags[32]);					//!< a set of Px1DConstraintFlags
+	PX_ALIGN(128, PxU32		solveHint[32]);				//!< constraint optimization hint, should be an element of PxConstraintSolveHint
+};
+
+
+struct PxgConstraint1DVelocities
+{
+	float4	linear0XYZ_geometricErrorW;			//!< linear component of velocity jacobian in world space, geometric error of the constraint along this axi
+	float4	angular0XYZ_velocityTargetW;			//!< angular component of velocity jacobian in world space, velocity target for the constraint along this axis
+
+	float4	linear1XYZ_minImpulseW;				//!< linear component of velocity jacobian in world space, minimum impulse the solver may apply to enforce this constraint
+	float4	angular1XYZ_maxImpulseW;				//!< angular component of velocity jacobian in world space, maximum impulse the solver may apply to enforce this constraint
+};
+
+struct PxgConstraint1DParameters
+{
+
+	union
+	{
+		struct SpringModifiers
+		{
+			PxReal	stiffness;				//!< spring parameter, for spring constraints
+			PxReal	damping;				//!< damping parameter, for spring constraints
+		} spring;
+		struct RestitutionModifiers
+		{
+			PxReal	restitution;			//!< restitution parameter for determining additional "bounce"
+			PxReal	velocityThreshold;		//!< minimum impact velocity for bounce
+		} bounce;
+	} mods;
+
+	PxU16		flags;					//!< a set of Px1DConstraintFlags
+	PxU16		solveHint;				//!< constraint optimization hint, should be an element of PxConstraintSolveHint
+
+	PxU32		pad;
+};
+
+struct PxgSolverConstraintDesc
+{
+	enum PxgConstraintType
+	{
+		eCONSTRAINT_1D,
+		eCONTACT,
+		eARTICULATION_CONSTRAINT_1D,
+		eARTICULATION_CONTACT,
+	};												
+	PxU8*					constraint;				//8
+	PxU32					bodyAIndex;
+	PxU32					bodyBIndex;
+	PxU16					constraintType;
+	PxU16					patchIndex;
+	//PxU16					pad;
+};
+
+#if !PX_P64_FAMILY
+PX_COMPILE_TIME_ASSERT(sizeof(PxgSolverConstraintDesc) == 16);
+#endif
+
+PX_ALIGN_PREFIX(16)
+struct PxgConstraintInvMassScale
+{
+
+#if !PX_CUDA_COMPILER
+	PxReal linear0;		//!< multiplier for inverse mass of body0
+	PxReal angular0;	//!< multiplier for inverse MoI of body0
+	PxReal linear1;		//!< multiplier for inverse mass of body1
+	PxReal angular1;	//!< multiplier for inverse MoI of body1
+#else
+	float4 lin0X_ang0Y_lin1Z_ang1W;
+#endif
+}PX_ALIGN_SUFFIX(16);
+
+
+struct PxgBlockContactData;
+struct PxgBlockContactPoint;
+
+namespace Sc
+{
+	class ShapeInteraction;
+}
+
+struct PxgConstraintBatchHeader
+{
+	PxU16										mDescStride;				//number between 1 to 32
+	PxU16										constraintType;				//constraint type (joint or contact)
+	PxU32										mConstraintBatchIndex;		//constraint batch index (the index for the specific joint/contact batch)
+	PxU32										mStartPartitionIndex;		//start partition index (the start index for the set of partition edges representing this batch)
+	PxU32										mask;						//Only used by the articulation internal constraint solver
+};
+
+
+struct PxgBlockConstraintBatch
+{
+	PxU16										mDescStride; //number between 1 to 32
+	PxU16										constraintType;
+	PxU32										blockContactIndex;
+	PxU32										mConstraintBatchIndex;
+	PxU32										startConstraintIndex;
+	PxU32										startFrictionIndex;  
+	PxU32										mStartPartitionIndex;
+	PxU32										mArticulationResponseIndex; //Only required for articulation constraints!
+	PxU32										mask;
+
+	PX_ALIGN(128, PxNodeIndex bodyANodeIndex[32]);
+	PX_ALIGN(128, PxNodeIndex bodyBNodeIndex[32]);
+
+	PX_ALIGN(128, PxU32	bodyAIndex[32]);
+	PX_ALIGN(128, PxU32	bodyBIndex[32]);
+
+	PX_ALIGN(128, PxU32	remappedBodyAIndex[32]);   
+	PX_ALIGN(128, PxU32	remappedBodyBIndex[32]);
+
+	PX_ALIGN(128, PxU32 slabId[32]);
+	
+	PX_ALIGN(128, Sc::ShapeInteraction* shapeInteraction[32]); // used for force-threshold reporting
+};
+
+struct PxgBlockWorkUnit
+{
+	PX_ALIGN(128, PxU32 mWriteback[32]);
+	
+	PX_ALIGN(128, float	mRestDistance[32]);
+
+	PX_ALIGN(128,	PxU32	mEdgeIndex[32]);
+	PX_ALIGN(128,	PxU32	mFlags[32]);
+	PX_ALIGN(128,	PxU32	mPatchIndex[32]);
+	PX_ALIGN(128,	PxU32	mFrictionPatchIndex[32]);
+	PX_ALIGN(128,	float2	mTorsionalFrictionData[32]);
+};
+
+#if PX_VC
+#pragma warning(pop)
+#endif
+
+//This used in contact preprep(constraintContactBlockPrePrepLaunch) and joint prep code(setupSolverConstraintBlockGPU) in GPU 
+struct PxgSolverConstraintManagerConstants
+{
+	PxU32 mEdgeIndex;
+	PxU32 mConstraintWriteBackIndex;
+};
+
+
+}
+
+#endif
+
--- a/engine/third_party/physx/source/gpusolver/include/PxgSolverContext.h
+++ b/engine/third_party/physx/source/gpusolver/include/PxgSolverContext.h
@@ -0,0 +1,49 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef PXG_SOLVER_CONTEXT_H
+#define	PXG_SOLVER_CONTEXT_H
+
+namespace physx
+{
+
+struct PxgThresholdStreamElement;
+struct PxgSolverBodyData;
+
+struct PxgSolverContext
+{
+	bool doFriction;
+
+	PX_CUDA_CALLABLE PxgSolverContext()
+	{
+	}
+};
+
+}
+
+#endif
--- a/engine/third_party/physx/source/gpusolver/include/PxgSolverCore.h
+++ b/engine/third_party/physx/source/gpusolver/include/PxgSolverCore.h
@@ -0,0 +1,485 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef PXG_SOLVER_CORE_H
+#define PXG_SOLVER_CORE_H
+
+#include "foundation/PxPinnedArray.h"
+#include "foundation/PxUserAllocated.h"
+#include "PxgConstraint.h"
+#include "PxgSolverCoreDesc.h"
+#include "PxvNphaseImplementationContext.h"
+#include "PxgCudaBuffer.h"
+#include "PxScene.h"
+
+namespace physx
+{
+	namespace Dy
+	{
+		struct ConstraintWriteback;
+	}
+
+	struct PxgConstraintPrePrepData
+	{
+	public:
+		PxU32 nbGpuRigidJoints;				//gpu preprep joints
+		PxU32 nbTotalRigidJoints;			//cpu + gpu preprep joints
+		PxU32 nbGpuArtiJoints;				//gpu preprep joints
+		PxU32 nbTotalArtiJoints;			//cpu + gpu preprep joint
+		PxU32 numContactBatches;
+		PxU32 num1dConstraintBatches;
+		PxU32 numStaticContactBatches;
+		PxU32 numStatic1dConstraintBatches;
+
+		PxU32 numArtiContactsBatches;
+		PxU32 numArti1dConstraintBatches;
+		PxU32 numArtiStaticContactsBatches;
+		PxU32 numArtiStatic1dConstraintBatches;
+		PxU32 numArtiSelfContactsBatches;
+		PxU32 numArtiSelf1dConstraintBatches;
+
+		PxU32 artiStaticConstraintBatchOffset;
+		PxU32 artiStaticContactBatchOffset;
+
+		PxU32* constraintUniqueIndices;
+		PxU32* contactUniqueIndices;
+		PxU32* constraintStaticUniqueIndices;
+		PxU32* contactStaticUniqueIndices;
+		PxU32* artiConstraintUniqueindices;
+		PxU32* artiContactUniqueIndices;
+		PxU32* artiStaticConstraintUniqueIndices;
+		PxU32* artiStaticContactUniqueIndices;
+
+		PxU32* artiStaticConstraintStartIndex;
+		PxU32* artiStaticConstraintCount;
+		PxU32* artiStaticContactStartIndex;
+		PxU32* artiStaticContactCount;
+
+		PxU32* rigidStaticConstraintStartIndex;
+		PxU32* rigidStaticConstraintCount;
+
+		//mapped memory
+		PxU32* constraint1DBatchIndices;
+		PxU32* constraintContactBatchIndices;
+		PxU32* artiConstraintContactBatchIndices;
+		PxU32* artiConstraint1dBatchindices;
+	};
+
+	struct PxgConstantData
+	{
+	public:
+		PxReal dt;
+		PxReal invDtF32;
+		PxReal bounceThresholdF32;
+		PxReal frictionOffsetThreshold;
+		PxReal correlationDistance;
+		PxReal ccdMaxSeparation;
+		PxReal biasCoefficient;
+		PxVec3 gravity;
+	};
+
+	struct PxgPartitionData
+	{
+	public:
+		const PxU32* constraintsPerPartition; //rigid body contact and 1d constraint
+		PxU32 numConstraintsPerPartition;
+
+		const PxU32* artiConstraintsPerPartition; // articulation contact and 1d constraint
+		PxU32 numArtiConstraintsPerPartition; 
+
+		PxU32 numTotalConstraints;
+		PxU32 numTotalContacts;
+		PxU32 numTotalStaticConstraints;
+		PxU32 numTotalStaticContacts;
+
+		PxU32 numTotalArtiContacts; //dynamic contacts
+		PxU32 numTotalArtiConstraints; //external constraints
+		PxU32 numTotalArtiStaticContacts; //static contacts
+		PxU32 numTotalArtiStaticConstraints; //static constraints
+		PxU32 numTotalArtiSelfContacts; //static contacts
+		PxU32 numTotalArtiSelfConstraints; //static constraints
+
+		PxU32 artiStaticContactBatchOffset;
+		PxU32 artiStaticConstraintBatchOffset;
+	};
+
+	class PxgPinnedHostLinearMemoryAllocator;
+
+	class PxgRadixSortBuffers
+	{
+		public:
+						PxgRadixSortBuffers(PxgHeapMemoryAllocatorManager* heapMemoryManager);
+
+		void			constructRadixSortDesc(PxgRadixSortDesc* rsDesc)	const;
+		void			allocate(PxU32 totalContactBatches);
+
+		PxgCudaBuffer	mInputKeys;
+		PxgCudaBuffer	mInputRanks;
+		PxgCudaBuffer	mOutputKeys;
+		PxgCudaBuffer	mOutputRanks;
+		PxgCudaBuffer	mRadixCounts; 
+	};
+
+	class PxgSolverCore : public PxUserAllocated
+	{
+	protected:
+
+		PxgCudaKernelWranglerManager*	mGpuKernelWranglerManager;
+		PxCudaContextManager*			mCudaContextManager;
+		PxCudaContext*					mCudaContext;
+		PxgGpuContext*					mGpuContext;
+		PxgHeapMemoryAllocatorManager*  mHeapMemoryManager;
+		
+		//PxgSimulationController*		mSimulationController;
+		/*PxgArticulationCore*			mArticulationCore;*/
+
+		PxgSolverCoreDesc*				mSolverCoreDesc;
+		PxgConstraintPrepareDesc*		mPrepareDesc;
+		PxgPrePrepDesc*					mPrePrepDesc;
+		PxgRadixSortDesc*				mRsDesc;
+
+		CUdeviceptr						mIslandContextPool;
+		CUdeviceptr						mSolverCoreDescd;
+		CUdeviceptr						mSharedDescd;
+		CUdeviceptr						mPrepareDescd;
+		CUdeviceptr						mPrePrepDescd;
+		CUdeviceptr						mPartionDescd;
+		CUdeviceptr						mRadixSortDescd[2];
+
+		PxU32							mNbStaticRigidSlabs;
+		PxU32							mMaxNumStaticPartitions;
+
+		PxU32							mTotalContactManagers;
+		PxU32							mNbPrevExceededForceElements;
+
+		PxU32							mNbArticSlabs;
+		PxU32							mNbConstraintSlabs; // slabs used for contacts and joints.
+
+		void							allocateNodeInteractionCounts(PxU32 nbNodes);
+		void							uploadNodeInteractionCounts(const PxU32* nodeInteractionCounts, PxU32 nbNodes);
+
+	public:
+
+		PxgSolverCore(PxgCudaKernelWranglerManager* gpuKernelWrangler, PxCudaContextManager* cudaContextManager,
+			PxgGpuContext* dynamicContext, PxgHeapMemoryAllocatorManager* heapMemoryManager);
+
+		virtual ~PxgSolverCore(){}
+
+		/*PX_FORCE_INLINE void setSimulationController(PxgSimulationController* simulationController) { mSimulationController = simulationController; }
+		PX_FORCE_INLINE PxgSimulationController* getSimulationController() { return mSimulationController; }*/
+
+		/*PX_FORCE_INLINE void setArticulationCore(PxgArticulationCore* articulationCore) { mArticulationCore = articulationCore; }
+		PX_FORCE_INLINE PxgArticulationCore* getArticulationCore() { return mArticulationCore; }
+*/
+		PX_FORCE_INLINE CUdeviceptr getPrePrepDescDeviceptr() { return mPrePrepDescd; }
+		PX_FORCE_INLINE CUdeviceptr getPrepDescDeviceptr() { return mPrepareDescd; }
+		PX_FORCE_INLINE CUdeviceptr getSolverCoreDescDeviceptr() { return mSolverCoreDescd; }
+		PX_FORCE_INLINE CUdeviceptr getSharedDescDeviceptr() { return mSharedDescd; }
+
+		virtual PxU32 getDescriptorsAllocationSize() = 0;
+		virtual void allocatePinnedDescriptors(PxgPinnedHostLinearMemoryAllocator& hostAllocator) = 0;
+
+		virtual void syncSimulationController() = 0;
+
+		virtual void gpuMemDMAUpContactData(PxgPinnedHostLinearMemoryAllocator* compressedContactsHostMemoryAllocator,
+				PxU32 compressedContactStreamUpperPartSize, 
+				PxU32 compressedContactStreamLowerPartSize, 
+				PxgPinnedHostLinearMemoryAllocator* compressedPatchesHostMemoryAllocator,
+				PxU32 compressedPatchStreamUpperPartSize, 
+				PxU32 compressedPatchStreamLowerPartSize, 
+				PxU32 totalContactManagers,
+				const PartitionIndexData* partitionIndexData,
+				const PartitionNodeData* partitionNodeData,
+				const PxgSolverConstraintManagerConstants* constantData,
+				PxU32 constantDataCount,
+				PxU32 partitionIndexDataCount,
+				const PxU32* partitionConstraintBatchStartIndices,
+				const PxU32* partitionArticConstraintBatchStartIndices,
+				const PxU32* partitionJointBatchCounts,
+				const PxU32* partitionArtiJointBatchCounts,
+				PxU32 nbPartitions,
+				const PxU32* destroyedEdges,
+				PxU32 nbDestroyedEdges,
+				const PxU32* npIndexArray, PxU32 npIndexArraySize,
+				PxU32 totalNumJoints,
+				const PxU32* islandIds, const PxU32* nodeInteractionCounts, PxU32 nbNodes, const PxU32* islandStaticTouchCount, PxU32 nbIslands) = 0;
+
+		virtual void gpuMemDmaUpBodyData(PxPinnedArray<PxgSolverBodyData>& solverBodyDataPool,
+			PxPinnedArray<PxgSolverTxIData>& solverTxIDataPool,
+			const PxU32 numSolverBodies,
+			const PxU32 totalNumRigidBatches, const PxU32 totalNumArticBatches,
+			const PxU32 nbSlabs, const PxU32 nbStaticSlabs, const PxU32 maxNumStaticPartitions) = 0;
+
+		virtual void allocateSolverBodyBuffers(const PxU32 numSolverBodies,
+			PxPinnedArray<PxNodeIndex>& islandNodeIndices,
+			const PxU32 numActiveActiculations, const PxU32 maxArticulationLinks) = 0;
+
+		virtual void gpuMemDMAUp(PxgPinnedHostLinearMemoryAllocator& hostAllocator, const PxgConstraintPrePrepData& data,
+			const PxU32 numSolverBodies, PxgConstraintBatchHeader* constraintBatchHeaders,
+			PxgIslandContext* islandContextPool, const PxU32 numIslands, const PxgPartitionData& partitionData ,
+			const PxU32 numConstraintBatchHeader, const PxU32 numStaticConstraintBatchHeader,
+			const PxU32 numArticConstraintBatchHeader, const PxU32 numArticStaticConstraintBatchHeader,
+			const PxU32 numArtiSelfConstraintBatchHeader, const PxgConstantData& cData,
+			const PxU32 numContactBlockes, const PxU32 numFrictionBlockes, 
+			const PxU32 numArtiContacts, const PxU32 numArtiFrictions,
+			const PxU32 totalCurrentEdges, const PxU32 totalPreviousEdges, const PxU32 numSlabs, const PxU32 maxNbPartitions,
+			const bool enableStabilization,
+			PxU8* cpuContactPatchStreamBase, PxU8* cpuContactStreamBase, PxU8* cpuForceStreamBase, PxsContactManagerOutputIterator& outputIterator,
+			const PxU32 totalActiveBodyCount, const PxU32 activeBodyStartIndex, const PxU32 numArticulations, Cm::UnAlignedSpatialVector* deferredZ,
+			PxU32* articulationDirty, uint4* articulationSlabMask, Sc::ShapeInteraction** shapeInteractions, PxReal* restDistances,
+			PxsTorsionalFrictionData* torsionalData,
+			PxU32* artiStaticContactIndices, const PxU32 artiContactIndSize, PxU32* artiStaticJointIndices, PxU32 artiStaticJointSize,
+			PxU32* artiStaticContactCounts, PxU32* artiStaticJointCounts,
+			PxU32* artiSelfContactIndices, const PxU32 artiSelfContactIndSize, PxU32* artiSelfJointIndices, PxU32 artiSelfJointSize,
+			PxU32* artiSelfContactCounts, PxU32* artiSelfJointCounts, 
+			PxU32* rigidStaticContactIndices, const PxU32 rigidContactIndSize, PxU32* rigidStaticJointIndices, const PxU32 rigidStaticJointSize,
+			PxU32* rigidStaticContactCounts, PxU32* rigidSaticJointCounts, const PxReal lengthScale, bool hasForceThresholds) = 0;
+
+		virtual void gpuMemDMAbackSolverData(PxU8* forceBufferPool, PxU32 forceBufferOffset, PxU32 forceBufferUpperPartSize,
+			PxU32 forceBufferLowerPartSize, Dy::ThresholdStreamElement* changedElems, bool hasForceThresholds, Dy::ConstraintWriteback* constraintWriteBack,
+			const PxU32 writeBackSize, bool copyAllToHost, Dy::ErrorAccumulator*& contactError) = 0;
+
+
+		virtual void syncDmaBack(PxU32& nbChangedThresholdElements) = 0;
+
+		virtual void createStreams() = 0;
+		virtual void releaseStreams() = 0;
+
+		virtual void acquireContext() = 0;
+		virtual void releaseContext() = 0;
+
+		virtual void preIntegration(const PxU32 offset, const PxU32 nbSolverBodies, const PxReal dt, const PxVec3& gravity) = 0;
+		
+		virtual void jointConstraintBlockPrePrepParallel(PxU32 nbConstraintBatches) = 0;
+
+		virtual void jointConstraintPrepareParallel(PxU32 nbJointBatches) = 0;
+		virtual void contactConstraintPrepareParallel(PxU32 nbContactBatches) = 0;
+		virtual void artiJointConstraintPrepare(PxU32 nbArtiJointBatches) = 0;
+		virtual void artiContactConstraintPrepare(PxU32 nbArtiContactBatches) = 0;
+		virtual void nonRigidConstraintPrepare(PxU32 nbParticulations) = 0;
+
+		virtual void solveContactMultiBlockParallel(PxgIslandContext* islandContexts, const PxU32 numIslands, const PxU32 maxPartitions,
+			PxInt32ArrayPinned& constraintsPerPartition, PxInt32ArrayPinned& artiConstraintsPerPartition, const PxVec3& gravity,
+			PxReal* posIterResidualSharedMem, PxU32 posIterResidualSharedMemSize, Dy::ErrorAccumulator* posIterError, PxPinnedArray<Dy::ErrorAccumulator>& artiContactPosIterError,
+			PxPinnedArray<Dy::ErrorAccumulator>& perArticulationInternalError) = 0;
+
+		virtual void accumulatedForceThresholdStream(PxU32 maxNodes) = 0;
+		virtual void integrateCoreParallel( const PxU32 offset, const PxU32 nbSolverBodies) = 0;
+
+		virtual void getDataStreamBase(void*& contactStreamBase, void*& patchStreamBase, void*& forceAndIndexStreamBase) = 0;
+
+		PX_FORCE_INLINE PxgDevicePointer<PxNodeIndex> getGpuIslandNodeIndices()  { return mIslandNodeIndices2.getTypedDevicePtr(); }
+
+		PX_FORCE_INLINE void setGpuContactManagerOutputBase(PxsContactManagerOutput* gpuContactManagerOutputBase) { mGpuContactManagerOutputBase = reinterpret_cast<CUdeviceptr>(gpuContactManagerOutputBase); }
+
+		PX_FORCE_INLINE CUstream getStream() { return mStream; } 
+
+		PX_FORCE_INLINE PxgDevicePointer<PxU32> getSolverBodyIndices() { return mSolverBodyIndices.getTypedDevicePtr(); }
+
+		PX_FORCE_INLINE PxgTypedCudaBuffer<PxgSolverBodyData>*	getSolverBodyData() { return &mSolverBodyDataPool; }
+
+		PX_FORCE_INLINE PxgDevicePointer<PxgSolverBodySleepData>	getSolverBodySleepData() { return mSolverBodySleepDataPool.getTypedDevicePtr();}
+
+		PX_FORCE_INLINE PxNodeIndex* getCpuIslandNodeIndices() { return mCpuIslandNodeIndices; }
+
+		PX_FORCE_INLINE PxgDevicePointer<PxgConstraintWriteback> getConstraintWriteBackBufferDevicePtr() const { return mConstraintWriteBackBuffer.getTypedDevicePtr(); }
+
+		void allocateFrictionPatchStream(PxI32 numContactBatches, PxI32 numArtiContactBatches);
+		PxgBlockFrictionIndex* allocateFrictionPatchIndexStream(PxU32 totalFrictionPatchCount);
+		void allocateFrictionCounts(PxU32 totalEdges);
+
+		void gpuMemDMAbackSolverBodies(float4* solverBodyPool, PxU32 nbSolverBodies,
+			PxPinnedArray<PxAlignedTransform>& body2WorldPool,
+			PxPinnedArray<PxgSolverBodySleepData>& solverBodySleepDataPool, bool enableDirectGPUAPI);
+
+		void allocateSolverBodyBuffersCommon(PxU32 numSolverBodies, PxPinnedArray<PxNodeIndex>& islandNodeIndices);
+
+		void constructConstraintPrePrepDesc(PxgPrePrepDesc& preDesc, PxU32 numBatches, PxU32 numStaticBatches, PxU32 numArticBatches, PxU32 numArticStaticBatches, PxU32 numArticSelfBatches,
+			const PxgPartitionData& pData, PxContact* cpuCompressedcontactsBase, PxContactPatch* cpuCompressedPatchesBase, PxReal* cpuForceBufferBase,
+			PxU32 nbD6RigidJoint, PxU32 nbD6ArtiJoint, PxU32 nbTotalArtiJoints,
+			PxsContactManagerOutputIterator& outputIterator, PxU32 maxConstraintPartitions, PxU32 totalActiveBodies, PxU32 totalActiveArticulations,
+			PxU32 activeBodyStartOffset, Sc::ShapeInteraction** shapeInteractions, PxReal* restDistances, PxsTorsionalFrictionData* torsionalData, PxU32 nbElementsPerBody, PxU32 numSlabs);
+
+		void constructSolverSharedDescCommon(PxgSolverSharedDescBase& desc,
+			const PxgConstantData& cData, Cm::UnAlignedSpatialVector* deferredZ, PxU32* articulationDirty, uint4* articulationSlabMask);
+
+		void constructSolverDesc(PxgSolverCoreDesc& scDesc, PxU32 numIslands, PxU32 numSolverBodies, PxU32 numConstraintBatchHeader, PxU32 numArticConstraints, PxU32 numSlabs, bool enableStabilization);
+
+		void gpuMemDMAUpJointData(const PxPinnedArray<PxgConstraintData>& cpuJointDataPool, const PxPinnedArray<Px1DConstraint>& cpuJointRowPool,
+			PxU32 nbCpuJoints, PxU32 nbGpuJoints, PxU32 totalCpuRows);
+
+		void gpuMemDMAUpArtiJointData(const PxPinnedArray<PxgConstraintData>& artiJointDataPool, const PxPinnedArray<Px1DConstraint>& artiJointRowPool,
+			PxU32 nbCpuArtiJoints, PxU32 nbGpuArtiJoints, PxU32 totalArtiRows);
+
+		void constraintPrePrepParallel(PxU32 nbConstraintBatches, PxU32 nbD6Joints, PxU32 numBodies);
+		
+		void precomputeReferenceCount(PxgIslandContext* islandContext, PxU32 islandIndex, PxInt32ArrayPinned& constraintsPerPartition,
+			PxInt32ArrayPinned& artiConstraintsPerPartition, bool isTGS, PxReal minPen = 0.0f, PxReal elapsedTime = 0.0f);
+		
+		void resetVelocities(bool isTGS);
+
+		PX_FORCE_INLINE	void	resetMemoryAllocator()
+		{
+			mCurrentIndex = 1 - mCurrentIndex;
+		}
+
+		PxgCudaBuffer		mContactHeaderBlockStream; //Different types for PGS and TGS
+		PxgCudaBuffer		mFrictionHeaderBlockStream; //Different types for PGS and TGS
+		PxgCudaBuffer		mContactBlockStream; //Different types for PGS and TGS
+		PxgCudaBuffer		mFrictionBlockStream; //Different types for PGS and TGS
+
+		PxgCudaBuffer		mJointHeaderBlockStream; //Different types for PGS and TGS
+		PxgCudaBuffer		mJointRowBlockStreamCon; //Different types for PGS and TGS
+		PxgTypedCudaBuffer<PxgBlockSolverConstraint1DMod>	mJointRowBlockStreamMod;
+
+		PxgTypedCudaBuffer<PxgBlockContactData>				mConstraintContactPrepBlockPool;
+
+		PxgTypedCudaBuffer<PxgBlockConstraint1DData>		mConstraint1DPrepBlockPool;
+		PxgTypedCudaBuffer<PxgBlockConstraint1DVelocities>	mConstraint1DPrepBlockPoolVel;
+		PxgTypedCudaBuffer<PxgBlockConstraint1DParameters>	mConstraint1DPrepBlockPoolPar;
+
+		PxgTypedCudaBuffer<PxgConstraintData>				mConstraintDataPool;
+		PxgTypedCudaBuffer<Px1DConstraint>					mConstraintRowPool;
+
+		PxgTypedCudaBuffer<PxgConstraintData>				mArtiConstraintDataPool;
+		PxgTypedCudaBuffer<Px1DConstraint>					mArtiConstraintRowPool;
+
+		PxgTypedCudaBuffer<float4>					mSolverBodyPool;
+		PxgTypedCudaBuffer<float4>					mTempStaticBodyOutputPool;
+		PxgTypedCudaBuffer<PxNodeIndex>				mIslandNodeIndices2;
+		PxgTypedCudaBuffer<PxU32>					mSolverBodyIndices;
+
+		PxgTypedCudaBuffer<float4>					mOutVelocityPool;		//this is the output of linear and angular velocity for the solver body
+		PxgTypedCudaBuffer<PxAlignedTransform>		mOutBody2WorldPool;		//this is the output of body to world transform for the solver body
+		PxgTypedCudaBuffer<PxgSolverBodyData>		mSolverBodyDataPool;
+		PxgTypedCudaBuffer<PxgSolverBodySleepData>	mSolverBodySleepDataPool;
+
+		PxgTypedCudaBuffer<float4>					mOutArtiVelocityPool; //velocity(linear and angular) of the link for the articulations
+		
+		PxgTypedCudaBuffer<PxgSolverTxIData>		mSolverTxIDataPool;
+		PxgTypedCudaBuffer<PxU32>					mConstraintsPerPartition;
+		PxgTypedCudaBuffer<PxU32>					mArtiConstraintsPerPartition;
+		PxgTypedCudaBuffer<float4>					mMotionVelocityArray;
+
+		PxgTypedCudaBuffer<PxgBlockConstraintBatch>	mBlockConstraintBatches;
+		CUdeviceptr									mConstraintBatchHeaders;
+		CUdeviceptr									mConstraintUniqueIndices;
+		CUdeviceptr									mContactUniqueIndices;
+
+		CUdeviceptr			mArtiConstraintUniqueIndices;
+		CUdeviceptr			mArtiContactUniqueIndices;
+
+		CUdeviceptr			mArtiStaticConstraintUniqueIndices;
+		CUdeviceptr			mArtiStaticContactUniqueIndices;
+
+		PxgTypedCudaBuffer<PxU32>		mArtiOrderedStaticConstraints;
+		PxgTypedCudaBuffer<PxU32>		mArtiOrderedStaticContacts;
+
+		PxgTypedCudaBuffer<PxgSolverReferences>		mSolverBodyReferences;
+		PxgTypedCudaBuffer<PxgBlockWorkUnit>		mBlockWorkUnits;
+
+		//Body remapping information
+		PxgTypedCudaBuffer<PartitionIndexData>		mPartitionIndexData;
+		PxgTypedCudaBuffer<PartitionNodeData>		mPartitionNodeData;
+		PxgTypedCudaBuffer<PxgSolverConstraintManagerConstants>		mSolverConstantData;
+		PxgTypedCudaBuffer<PxU32>	mPartitionStartBatchIndices;
+		PxgTypedCudaBuffer<PxU32>	mPartitionArticulationStartBatchIndices;
+		PxgTypedCudaBuffer<PxU32>	mPartitionJointBatchCounts;
+		PxgTypedCudaBuffer<PxU32>	mPartitionArtiJointBatchCounts;
+									
+		PxgTypedCudaBuffer<PxU32>	mDestroyedEdgeIndices;
+		PxgTypedCudaBuffer<PxU32>	mNpIndexArray;
+
+		PxgTypedCudaBuffer<PxgBlockContactPoint>	mGpuContactBlockBuffer;
+		PxgTypedCudaBuffer<PxU32>					mDataBuffer;
+		PxgTypedCudaBuffer<PxContact>				mCompressedContacts;
+		PxgTypedCudaBuffer<PxContactPatch>			mCompressedPatches;
+		PxgTypedCudaBuffer<PxgConstraintWriteback>	mConstraintWriteBackBuffer; //1d constraint write back buffer
+		PxgTypedCudaBuffer<PxReal>					mForceBuffer; // contact write back buffer
+		PxgTypedCudaBuffer<PxFrictionPatch>			mFrictionPatches;
+
+		CUdeviceptr						mGpuContactManagerOutputBase;
+
+		PxgTypedCudaBuffer<PxU32>		mArtiStaticContactIndices;
+		PxgTypedCudaBuffer<PxU32>		mArtiStaticJointIndices;
+		PxgTypedCudaBuffer<PxU32>		mArtiStaticContactCounts;
+		PxgTypedCudaBuffer<PxU32>		mArtiStaticJointCounts;
+
+		PxgTypedCudaBuffer<PxU32>		mRigidStaticContactIndices;
+		PxgTypedCudaBuffer<PxU32>		mRigidStaticJointIndices;
+		PxgTypedCudaBuffer<PxU32>		mRigidStaticContactCounts;
+		PxgTypedCudaBuffer<PxU32>		mRigidStaticJointCounts;
+		PxgTypedCudaBuffer<PxU32>		mRigidStaticContactStartIndices;
+		PxgTypedCudaBuffer<PxU32>		mRigidStaticJointStartIndices;
+
+		PxgTypedCudaBuffer<PxU32>		mTempContactUniqueIndicesBlockBuffer;
+		PxgTypedCudaBuffer<PxU32>		mTempConstraintUniqueIndicesBlockBuffer;
+		PxgTypedCudaBuffer<PxU32>		mTempContactHeaderBlockBuffer;
+		PxgTypedCudaBuffer<PxU32>		mTempConstraintHeaderBlockBuffer;
+
+		PxgTypedCudaBuffer<PxU32>		mArtiSelfContactIndices;
+		PxgTypedCudaBuffer<PxU32>		mArtiSelfJointIndices;
+		PxgTypedCudaBuffer<PxU32>		mArtiSelfContactCounts;
+		PxgTypedCudaBuffer<PxU32>		mArtiSelfJointCounts;
+
+		PxgTypedCudaBuffer<PxU32>		mNodeInteractionCounts;
+
+		PxgCudaBufferN<2>	mFrictionPatchBlockStream;
+		PxgCudaBufferN<2>	mFrictionAnchorPatchBlockStream;
+		PxgCudaBufferN<2>	mFrictionIndexStream;
+		PxgCudaBufferN<2>	mFrictionPatchCounts;
+
+		//Non-block versions for articulation contacts. Remove!
+		PxgCudaBufferN<2>	mFrictionPatchStream;
+		PxgCudaBufferN<2>	mFrictionAnchorPatchStream;
+
+		PxU32				mCurrentIndex;
+
+		CUdeviceptr			mArtiStaticConstraintStartIndex;
+		CUdeviceptr			mArtiStaticConstraintCount;
+		CUdeviceptr			mArtiStaticContactStartIndex;
+		CUdeviceptr			mArtiStaticContactCount;
+
+		CUstream			mStream;
+		CUstream			mStream2;
+
+		PxU32*				mPinnedEvent;
+
+		CUevent				mEventDmaBack;
+
+		CUevent				mIntegrateEvent;
+
+		PxNodeIndex*		mCpuIslandNodeIndices;
+
+		PxU32				mSolverBodyOutputVelocityOffset;
+
+		PxgRadixSortBuffers	mRadixSort;
+	};
+}
+
+#endif
--- a/engine/third_party/physx/source/gpusolver/include/PxgSolverCoreDesc.h
+++ b/engine/third_party/physx/source/gpusolver/include/PxgSolverCoreDesc.h
@@ -0,0 +1,495 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef PXG_SOLVER_CORE_DESC_H
+#define PXG_SOLVER_CORE_DESC_H
+
+#include "PxgNarrowphaseCore.h"
+#include "DyResidualAccumulator.h"
+
+struct float4;
+
+namespace physx
+{
+	namespace Cm
+	{
+		struct UnAlignedSpatialVector;
+	}
+
+	namespace Sc
+	{
+		class ShapeInteraction;
+	}
+
+	struct PxgConstraintData;
+	struct PxgConstraintPrePrep;
+
+	struct PxgBlockConstraint1DData;
+	struct PxgBlockConstraint1DVelocities;
+	struct PxgBlockConstraint1DParameters;
+	struct PxgBlockContactData;
+	struct PxgBlockContactPoint;
+	struct PxgConstraint1DData;
+	struct PxgConstraint1DVelocities;
+	struct PxgConstraint1DParameters;
+
+	struct PxgSolverBodyData;
+	struct PxgSolverBodySleepData;
+	struct PxgSolverTxIData;
+	
+	struct PxgIslandContext;
+	struct PxgBodySim;
+	struct PxgBodySimVelocities;
+	class PxgArticulation;
+
+	struct PxgSolverConstraintDesc;
+	struct PxgBlockWorkUnit;
+
+	struct PxgBlockConstraintBatch;
+
+	struct PxgBlockFrictionPatch;
+	struct PxgBlockFrictionAnchorPatch;
+
+	struct PxgBlockSolverConstraint1DHeader;
+	struct PxgBlockSolverConstraint1DCon;
+	struct PxgBlockSolverConstraint1DMod;
+
+	struct PxgTGSBlockSolverConstraint1DHeader;
+	struct PxgTGSBlockSolverConstraint1DCon;
+	
+	struct PxgBlockSolverContactHeader;
+	struct PxgBlockSolverFrictionHeader;
+	struct PxgBlockSolverContactPoint;
+	struct PxgBlockSolverContactFriction;
+	struct PxgBlockFrictionIndex;
+
+	struct PxgTGSBlockSolverContactHeader;
+	struct PxgTGSBlockSolverFrictionHeader;
+	struct PxgTGSBlockSolverContactPoint;
+	struct PxgTGSBlockSolverContactFriction;
+
+	struct PxgFrictionPatch;
+	struct PxgFrictionAnchorPatch;
+
+	struct PxgSolverConstraint1DHeader;
+	struct PxgSolverConstraint1DCon;
+	struct PxgSolverConstraint1DMod;
+
+	struct PxgTGSSolverConstraint1DHeader;
+	struct PxgTGSSolverConstraint1DCon;
+	struct PxgTGSSolverConstraint1DMod;
+
+	struct PxgSolverContactHeader;
+	struct PxgSolverFrictionHeader;
+	struct PxgSolverContactPointExt;
+	struct PxgSolverContactFrictionExt;
+	  
+	struct PxContact;
+	struct PxContactPatch;
+	struct PxgD6JointData;
+	struct PxgSolverReferences;
+	struct PxFrictionPatch;
+
+	struct PxsContactManagerOutput;
+	struct PartitionIndexData;
+	struct PartitionNodeData;
+	struct PxgSolverConstraintManagerConstants;
+	struct PxgConstraintBatchHeader;
+	struct PxgConstraintWriteback;
+	class PxAlignedTransform;
+	struct Px1DConstraint;
+
+	struct PxgTGSSolverContactHeader;
+	struct PxgTGSSolverContactPointExt;
+	struct PxgTGSSolverFrictionExt;
+
+	struct PxgArticulationBlockResponse;
+
+	struct PxsTorsionalFrictionData;
+
+	namespace Dy
+	{
+		struct ThresholdStreamElement;
+		class ThresholdStream;
+	}
+
+	struct IterativeSolveData
+	{
+		PxgBlockConstraintBatch*		blockConstraintBatch;
+		PxgBlockSolverConstraint1DHeader* blockJointConstraintHeaders;
+		PxgBlockSolverConstraint1DCon*	blockJointConstraintRowsCon;
+		PxgBlockSolverConstraint1DMod*	blockJointConstraintRowsMod;
+
+		PxgBlockSolverContactHeader*	blockContactHeaders;
+		PxgBlockSolverFrictionHeader*	blockFrictionHeaders;
+		PxgBlockSolverContactPoint*		blockContactPoints;
+		PxgBlockSolverContactFriction*	blockFrictions;
+
+		//first numSolverBodies float4s are linear velocity, last numSolverBodies float4s are angular velocity
+		float4*							solverBodyVelPool; 
+		float4*							tempStaticBodyOutputPool;
+
+		// Each bit encodes the activation of a slab (32 bits). When there are more than 32 slabs, use multiple indices.
+		// To query the reference count, count the number of active slabs/bits.
+		PxU32*							solverEncodedReferenceCount;
+		PxgSolverContactHeader*			contactHeaders;
+		PxgSolverFrictionHeader*		frictionHeaders;
+		PxgSolverContactPointExt*		contactPoints;
+		PxgSolverContactFrictionExt*	frictions;
+
+		PxgArticulationBlockResponse*	artiResponse;
+	};
+
+	struct IterativeSolveDataTGS
+	{
+		PxgBlockConstraintBatch*				blockConstraintBatch;
+		PxgTGSBlockSolverConstraint1DHeader*	blockJointConstraintHeaders;
+		PxgTGSBlockSolverConstraint1DCon*		blockJointConstraintRowsCon;
+		PxgBlockSolverConstraint1DMod*			blockJointConstraintRowsMod;
+
+		PxgTGSBlockSolverContactHeader*			blockContactHeaders;
+		PxgTGSBlockSolverFrictionHeader*		blockFrictionHeaders;
+		PxgTGSBlockSolverContactPoint*			blockContactPoints;
+		PxgTGSBlockSolverContactFriction*		blockFrictions;
+
+		//first numSolverBodies float4s are linear velocity, last numSolverBodies float4s are angular velocity
+		float4*									solverBodyVelPool;
+		float4*									tempStaticBodyOutputs;
+
+		// Each bit encodes the activation of a slab (32 bits). When there are more than 32 slabs, use multiple indices.
+		// To query the reference count, count the number of active slabs/bits.
+		PxU32*									solverEncodedReferenceCount;
+		
+		PxgTGSSolverContactHeader*				contactHeaders;
+		PxgSolverFrictionHeader*				frictionHeaders;	//Technically, not needed
+		PxgTGSSolverContactPointExt*			contactPoints;
+
+		PxgTGSSolverFrictionExt*				frictions;
+
+		PxgArticulationBlockResponse*			artiResponse;
+	};
+
+	struct PxgSolverSharedDescBase
+	{
+		PxgBlockFrictionPatch*		blockCurrentFrictionPatches;
+		PxgBlockFrictionPatch*		blockPreviousFrictionPatches;
+
+		PxgFrictionPatch*			currentFrictionPatches;
+		PxgFrictionPatch*			previousFrictionPatches;
+
+		PxgBodySim*					mBodySimBufferDeviceData; //If the body is articulation, we will have a remap index to the articulation array
+		PxgArticulation*			articulations;
+
+		Cm::UnAlignedSpatialVector* articulationDeferredZ;
+		PxU32*						articulationDirty;
+		uint4*						articulationSlabMask;
+		PxU32						deltaOutOffset;
+
+		float						dt;
+		float						stepDt;
+		float						invDtF32;
+		float						stepInvDtF32;
+
+		float						lengthScale;
+	};
+
+	//this desc is shared by solve and prepare kernels
+	template <typename IterData>
+	struct PxgSolverSharedDesc : PxgSolverSharedDescBase
+	{
+		IterData	iterativeData;
+	};
+
+	struct PxgSolverCoreDesc
+	{
+		float4* outSolverVelocity;
+		PxAlignedTransform* outBody2World;
+		PxgSolverBodyData* solverBodyDataPool;
+		PxgSolverTxIData* solverBodyTxIDataPool;
+		PxgSolverBodySleepData* solverBodySleepDataPool;
+
+		float4* outArtiVelocity;
+
+		PxgIslandContext* islandContextPool;
+		float4* motionVelocityArray; // first numSolverBodies float4s are linear velocity, last numSolverBodies float4s are angular velocity
+		PxU32* constraintsPerPartition;
+		PxU32* artiConstraintsPerPartition;
+		Dy::ThresholdStreamElement* thresholdStream;
+		Dy::ThresholdStreamElement* tmpThresholdStream;
+		Dy::ThresholdStreamElement*	exceededForceElements;
+		Dy::ThresholdStreamElement*	prevExceededForceElements;
+		Dy::ThresholdStreamElement*	forceChangeThresholdElements; //this is store all pairs which will trigger force exceeded or lost events
+		PxReal* thresholdStreamAccumulatedForce;
+		PxReal* thresholdStreamAccumulatedForceBetweenBlocks;
+		PxU32* thresholdStreamWriteIndex;
+		PxU32* thresholdStreamWriteIndexBetweenBlocks;
+		bool*  thresholdStreamWriteable;
+		PxReal* accumulatedForceObjectPairs;
+
+		PxgConstraintWriteback* constraintWriteBack; // 1D constraint write back
+		PxF32* forceBuffer; // contact write back
+		PxFrictionPatch* frictionPatches;
+
+		PxU32* mRigidStaticContactCounts;
+		PxU32* mRigidStaticContactStartIndices;
+
+		PxU32* mRigidStaticJointCounts;
+		PxU32* mRigidStaticJointStartIndices;
+
+		PxgSolverReferences* solverBodyReferences;
+		PxsContactManagerOutput* contactManagerOutputBase;
+		PxgBodySim*	mBodySimBufferDeviceData;
+		PxgBodySimVelocities* mBodySimPrevVelocitiesBufferDeviceData;
+
+		PxU32 numIslands;	 
+		PxU32 numBatches;
+		PxU32 numArticBatches;
+		PxU32 numSolverBodies;
+		PxU32 numSlabs;
+		PxU32 accumulatedBodyDeltaVOffset;
+		
+		PxI32 sharedThresholdStreamIndex;
+		
+		bool enableStabilization;
+	
+		PxU32 nbExceededThresholdElements;
+		PxU32 nbPrevExceededThresholdElements;
+		PxU32 nbForceChangeElements;
+
+		PxU32 maxLinksPerArticulation;
+
+		Dy::ErrorAccumulator contactErrorAccumulator;
+	};
+
+	struct PxgConstraintPrepareDesc
+	{
+		PxU32* jointConstraintBatchIndices;			//indices for joint batch
+		PxU32* contactConstraintBatchIndices;		//indices for contact batch
+		PxU32* artiJointConstraintBatchIndices;		//indices for articulation joint batch
+		PxU32* artiContactConstraintBatchIndices;	//indices for articulation contact batch
+
+		PxgSolverConstraintManagerConstants*	solverConstantData;
+		PxgBlockConstraint1DData*				blockJointPrepPool;
+		PxgBlockConstraint1DVelocities*			blockJointPrepPool0;
+		PxgBlockConstraint1DParameters*			blockJointPrepPool1;
+
+		PxgSolverBodyData*						solverBodyDataPool;
+		PxgSolverTxIData*						solverBodyTxIDataPool;
+		PxgBlockWorkUnit*						blockWorkUnit;
+
+		PxgBlockFrictionIndex*					blockCurrentFrictionIndices;
+		PxgBlockFrictionIndex*					blockPreviousFrictionIndices;
+
+		PxgBlockContactData*					blockContactCurrentPrepPool;
+		PxgBlockContactPoint*					blockContactPoints;
+
+		PxgBlockFrictionAnchorPatch*			blockCurrentAnchorPatches;
+		PxgBlockFrictionAnchorPatch*			blockPreviousAnchorPatches;
+
+		////////////////////////////////////////////////////////////////////////////
+		//for articulation
+		PxgFrictionPatch*						currentFrictionPatches;
+		PxgFrictionPatch*						previousFrictionPatches;
+
+		PxgFrictionAnchorPatch*					currentAnchorPatches;
+		PxgFrictionAnchorPatch*					previousAnchorPatches;
+
+	/*	PxgConstraint1DData*					jointPrepPool;
+		PxgConstraint1DVelocities*				jointPrepPool0;
+		PxgConstraint1DParameters*				jointPrepPool1;*/
+
+		//////////////////////////////////////////////////////////////////////////////
+
+		PxAlignedTransform*						body2WorldPool;
+
+		PxsContactManagerOutput* contactManagerOutputBase;
+
+		PxU32* constraintUniqueIndices;
+		PxU32* artiConstraintUniqueIndices;
+		PxU32* artiContactUniqueIndices;
+
+		PxU32 num1dConstraintBatches;
+		PxU32 numContactBatches;
+
+		PxU32 numStatic1dConstraintBatches;
+		PxU32 numStaticContactBatches;
+
+		PxU32 numArti1dConstraintBatches;
+		PxU32 numArtiStatic1dConstraintBatches;
+		PxU32 numArtiSelf1dConstraintBatches;
+		PxU32 numArtiContactBatches;
+		PxU32 numArtiStaticContactBatches;
+		PxU32 numArtiSelfContactBatches;
+
+		PxU32 totalBodyCount;
+		
+		PxU32 numBatches;
+		PxU32 numStaticBatches;
+	
+		float bounceThresholdF32;
+		float frictionOffsetThreshold;
+		float correlationDistance;
+		float ccdMaxSeparation;
+		
+		PxU32 totalPreviousEdges;
+		PxU32 totalCurrentEdges;
+
+		PxU32 articContactIndex;
+		PxU32 articJointIndex;
+		PxU32 nbElementsPerBody;
+
+		PxReal biasCoefficient;
+	};
+
+
+	struct PxgPrePrepDesc
+	{
+		PxgBlockConstraintBatch*			blockBatches;
+		PxU32								numBatches;
+		PxU32								numStaticBatches;
+		PxU32								numArtiBatches;
+		PxU32								numArtiStaticBatches;
+		PxU32								numArtiSelfBatches;
+		PxU32								nbD6RigidJoints;
+		PxU32								nbD6ArtiJoints;
+		PxU32								nbTotalArtiJoints; //Only used for an assert
+
+		PxU32								numTotalContacts;
+		PxU32								numTotalConstraints;
+		PxU32								numTotalStaticContacts;
+		PxU32								numTotalStaticConstraints;
+
+		PxU32								numTotalArtiContacts;		//dynamic contacts
+		PxU32								numTotalArtiConstraints;	//external constraints
+		PxU32								numTotalStaticArtiContacts;		//static contacts
+		PxU32								numTotalStaticArtiConstraints;	//static constraints
+		PxU32								numTotalSelfArtiContacts;		//static contacts
+		PxU32								numTotalSelfArtiConstraints;	//static constraints
+
+		PxU32								artiStaticConstraintBatchOffset;
+		PxU32								artiStaticContactBatchOffset;
+
+		PxgBlockWorkUnit*					blockWorkUnit;
+		PxgBlockContactData*				blockContactData;						//GPU output data		
+		PxgBlockContactPoint*				blockContactPoints;
+		PxContact*							compressedContacts;
+		PxContactPatch*						compressedPatches;
+		PxU8*								forceBuffer;
+
+		PxContact*							cpuCompressedContactsBase;
+		PxContactPatch*						cpuCompressedPatchesBase;
+		PxReal*								cpuForceBufferBase;
+
+		PxgBlockConstraint1DData*			blockPrepData;						//GPU output data
+		PxgBlockConstraint1DVelocities*		blockPrepVelocityData;				//GPU output data
+		PxgBlockConstraint1DParameters*		blockPrepParameterData;				//GPU output data
+
+		PxgConstraintData*					constraintData;						//GPU output/Input data for d6 joint, GPU input for cpu joint
+		Px1DConstraint*						constraintRows;						//GPU output/Input joint row data  for d6 joint. GPU input for cpu joint
+
+		PxgConstraintData*					artiConstraintData;					//GPU input data
+		Px1DConstraint*						artiConstraintRows;					//GPU input joint row data
+
+		const PxgD6JointData*				rigidJointData;						//GPU input data
+		const PxgConstraintPrePrep*			rigidConstraintPrePrep;				//GPU input data
+
+		const PxgD6JointData*				artiJointData;						//GPU input data
+		const PxgConstraintPrePrep*			artiConstraintPrePrep;				//GPU input data
+
+		PxsContactManagerOutput*			contactManagerOutputBase;
+
+		PxU32								sharedJointRowIndex;
+		PxU32								sharedFrictionConstraintIndex;
+		PxU32								sharedContactConstraintIndex;
+		PxU32								sharedArticulationResponseIndex;
+		PxU32*								solverBodyIndices;
+
+		PartitionIndexData*					mPartitionIndices;
+		PxU32*								mPartitionstartBatchIndices;
+		PxU32*								mPartitionArtiStartBatchIndices;
+		PxU32*								mPartitionJointCounts;		
+		PxU32*								mPartitionArtiJointCounts;
+
+		PxU32*								prevFrictionPatchCount;
+		PxU32*								currFrictionPatchCount;
+
+		PxU32*								mNpOutputIndices;
+
+		PxgSolverBodyData*					mSolverBodyData;
+
+		PxU32								mCmOutputOffsets[GPU_BUCKET_ID::eCount];
+		PartitionNodeData*					mPartitionNodeData;
+		PxgSolverConstraintManagerConstants* mContactConstantData;
+
+		PxgConstraintBatchHeader*			mBatchHeaders;
+		PxU32*								mContactUniqueIndices;
+		PxU32*								mConstraintUniqueIndices;
+
+		PxU32*								mArtiConstraintUniqueIndices; //external constraints
+		PxU32*								mArtiContactUniqueIndices; //dynamic contacts
+
+		PxgSolverReferences*				mSolverBodyReferences;
+		PxU32								mMaxConstraintPartitions;
+		PxU32								mTotalSlabs;
+		PxU32								mTotalActiveBodies;
+		PxU32								mTotalActiveArticulations;
+		PxU32								mActiveBodyStartOffset;
+		PxU32								nbElementsPerBody;
+
+		Sc::ShapeInteraction**				mShapeInteractions;
+		PxReal*								mRestDistances;
+		PxsTorsionalFrictionData*			mTorsionalFrictionData;
+
+		//Static articulation contact data
+		PxU32*								mArtiStaticContactIndices;
+		PxU32*								mArtiStaticConstraintIndices;
+		PxU32*								mArtiStaticContactCounts;
+		PxU32*								mArtiStaticConstraintCounts;
+
+		PxU32*								mArtiSelfContactIndices;
+		PxU32*								mArtiSelfConstraintIndices;
+		PxU32*								mArtiSelfContactCounts;
+		PxU32*								mArtiSelfConstraintCounts;
+
+		//Static rigid body contact data
+		PxU32*								mRigidStaticContactIndices;
+		PxU32*								mRigidStaticConstraintIndices;
+		PxU32*								mRigidStaticContactCounts;
+		PxU32*								mRigidStaticConstraintCounts;
+
+		PxU32*								mRigidStaticContactStartIndices;
+		PxU32*								mRigidStaticConstraintStartIndices;
+
+		PxU32*								mTempContactUniqueIndices;
+		PxU32*								mTempConstraintUniqueIndices;
+		PxU32*								mTempContactBlockHeader;
+		PxU32*								mTempConstraintBlockHeader;
+	};
+}
+
+#endif
--- a/engine/third_party/physx/source/gpusolver/include/PxgSolverFlags.h
+++ b/engine/third_party/physx/source/gpusolver/include/PxgSolverFlags.h
@@ -0,0 +1,59 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef PXG_SOLVER_FLAGS_H
+#define PXG_SOLVER_FLAGS_H
+
+namespace physx
+{
+struct PxgSolverContactFlags
+{
+	enum Enum
+	{
+		eHAS_FORCE_THRESHOLDS = 1 << 0,
+
+		// This flag...
+		// - disables correlation of contact patches with friction patches from the previous frame
+		// - enables target velocities being read from the friction anchor contact points
+		//
+		// Two scenarios will raise this flag:
+		// - strong/sticky friction is disabled
+		// - contact modification sets a target velocity on contact points
+		ePER_POINT_FRICTION = 1 << 1,
+
+		eDISABLE_FRICTION = 1 << 2,
+		eCOMPLIANT_ACCELERATION_SPRING = 1 << 3,
+
+		eLAST
+	};
+};
+PX_COMPILE_TIME_ASSERT(PxgSolverContactFlags::eLAST <= ((1 << 7) + 1)); // we store these Flags as PxU8
+
+} // namespace physx
+
+#endif
--- a/engine/third_party/physx/source/gpusolver/include/PxgSolverKernelIndices.h
+++ b/engine/third_party/physx/source/gpusolver/include/PxgSolverKernelIndices.h
@@ -0,0 +1,129 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef PXG_SOLVER_KERNEL_INDICES_H
+#define PXG_SOLVER_KERNEL_INDICES_H
+
+namespace physx
+{
+
+#define PXG_USE_SHARED_MEMORY_PRE_PREP 0
+
+struct PxgKernelBlockDim
+{
+	enum
+	{
+		//Constraint partition
+		CONSTRAINT_PRE_PARTITION						= 1024,
+		CONSTRAINT_PARTITION							= 1024,
+
+		PRE_INTEGRATION									= 128,
+
+		//Constraint pre-preparation
+		CONSTRAINT_PREPREP_BLOCK						= 128,
+
+		//Constraint preparation
+		CONSTRAINT_PREPARE_BLOCK_PARALLEL				= 64,
+
+		ARTI_CONSTRAINT_PREPARE							= 64,
+		
+		//Multi-block solver code
+		ZERO_BODIES										= 256,
+		SOLVE_BLOCK_PARTITION							= 64,
+		CONCLUDE_BLOCKS									= 256,
+		WRITEBACK_BLOCKS								= 256,
+		WRITE_BACK_BODIES								= 256,
+		COMPUTE_BODIES_AVERAGE_VELOCITY					= 256,
+
+		//threshold stream
+		INITIALIZE_INPUT_AND_RANKS						= 256,
+		RADIXSORT										= 256,
+		REORGANIZE_THRESHOLDSTREAM						= 256,
+		COMPUTE_ACCUMULATED_THRESHOLDSTREAM				= 256,
+		OUTPUT_ACCUMULATED_THRESHOLDSTREAM				= 256,
+		WRITEOUT_ACCUMULATEDFORCEPEROBJECT				= 256,
+		COMPUTE_EXCEEDEDFORCE_THRESHOLDELEMENT_INDICE	= 256,
+		OUTPUT_EXCEEDEDFORCE_THRESHOLDELEMENT_INDICE	= 256,
+		SET_THRESHOLDELEMENT_MASK						= 256,
+		COMPUTE_THRESHOLDELEMENT_MASK_INDICES			= 256,
+		OUTPUT_THRESHOLDELEMENT_MASK_INDICES			= 256,
+		CREATE_FORCECHANGE_THRESHOLDELEMENTS			= 256,
+
+		//Integration
+		INTEGRATE_CORE_PARALLEL							= 128,
+		CLEAR_FRICTION_PATCH_COUNTS						= 256,
+		DMA_CHANGED_ELEMS								= 512,
+		COMPUTE_STATIC_CONTACT_CONSTRAINT_COUNT			= 512
+	};
+};
+
+struct PxgKernelGridDim
+{
+	enum
+	{
+		//Constraint partition
+		CONSTRAINT_PRE_PARTITION						= 1,
+		CONSTRAINT_PARTITION							= 1,
+
+		PRE_INTEGRATION									= 64,
+		//Constraint preparation
+		CONSTRAINT_PREPREP_BLOCK						= 128,
+
+		CONSTRAINT_PREPARE_BLOCK_PARALLEL				= 256,	
+		
+		//Multi-block solver code
+		ZERO_BODIES								= 64,
+		SOLVE_BLOCK_PARTITION					= 64,
+		CONCLUDE_BLOCKS							= 64,
+		WRITEBACK_BLOCKS						= 64,
+		WRITE_BACK_BODIES						= 64,
+		COMPUTE_BODIES_AVERAGE_VELOCITY			= 128,
+
+		//threshold stream
+		INITIALIZE_INPUT_AND_RANKS						= 64,
+		RADIXSORT										= 32, //this must be 32 to match the BLOCK_SIZE for the radix sort kernel
+		REORGANIZE_THRESHOLDSTREAM						= 64,
+		COMPUTE_ACCUMULATED_THRESHOLDSTREAM				= 32,//this must be 32 to match the BLOCK_SIZE for the compute kernel
+		OUTPUT_ACCUMULATED_THRESHOLDSTREAM				= 32,//this must be 32 to match the BLOCK_SIZE for the output kernel
+		WRITEOUT_ACCUMULATEDFORCEPEROBJECT				= 64,
+		COMPUTE_EXCEEDEDFORCE_THRESHOLDELEMENT_INDICE	= 32,
+		OUTPUT_EXCEEDEDFORCE_THRESHOLDELEMENT_INDICE	= 32,
+		SET_THRESHOLDELEMENT_MASK						= 64,
+		COMPUTE_THRESHOLDELEMENT_MASK_INDICES			= 32,
+		OUTPUT_THRESHOLDELEMENT_MASK_INDICES			= 32,
+		CREATE_FORCECHANGE_THRESHOLDELEMENTS			= 64,
+		//Integration
+		INTEGRATE_CORE_PARALLEL							= 64,
+		CLEAR_FRICTION_PATCH_COUNTS						= 64,
+		DMA_CHANGED_ELEMS								= 64,
+	};
+};
+
+}  
+
+#endif
--- a/engine/third_party/physx/source/gpusolver/include/PxgTGSCudaSolverCore.h
+++ b/engine/third_party/physx/source/gpusolver/include/PxgTGSCudaSolverCore.h
@@ -0,0 +1,204 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef PXG_TGS_CUDA_SOLVER_CORE_H
+#define PXG_TGS_CUDA_SOLVER_CORE_H
+
+#include "PxgSolverCore.h"
+
+namespace physx
+{
+	// PT: TODO: rename to just PxgTGSSolverCore ?
+	class PxgTGSCudaSolverCore : public PxgSolverCore
+	{
+		PX_NOCOPY(PxgTGSCudaSolverCore)
+	private:
+
+		//this is for articulation
+		PxgCudaBuffer		mConstraintContactPrepPool;
+		PxgTypedCudaBuffer<PxgTGSSolverContactHeader>		mContactHeaderStream;
+		PxgTypedCudaBuffer<PxgTGSSolverContactPointExt>		mContactStream;
+		PxgTypedCudaBuffer<PxgTGSSolverFrictionExt>		mFrictionStream;
+
+		// Each bit encodes the activation of a slab (32 bits). When there are more than 32 slabs, use multiple indices.
+		// To query the reference count, count the number of active slabs/bits.
+		PxgTypedCudaBuffer<PxU32>		mSolverEncodedReferenceCount;
+
+		//This is the new articulation block constraint format!
+		//It shares the original rigid body contact/constraint format but adds in
+		//an additional buffer for the response vectors
+		PxgTypedCudaBuffer<PxgArticulationBlockResponse>		mArtiConstraintBlockResponse;
+		
+		PxgTypedCudaBuffer<Dy::ThresholdStreamElement>		mForceThresholdStream;
+		PxgTypedCudaBuffer<Dy::ThresholdStreamElement>		mTmpForceThresholdStream;
+
+		PxgTypedCudaBuffer<PxU32>		mConstraint1DBatchIndices;
+		PxgTypedCudaBuffer<PxU32>		mContactBatchIndices;
+		PxgTypedCudaBuffer<PxU32>		mArtiContactBatchIndices;
+		PxgTypedCudaBuffer<PxU32>		mArtiConstraint1dBatchIndices;
+
+		PxgTypedCudaBuffer<PxReal>		mAccumulatedForceObjectPairs; //store the accumulated force for a pair of objects
+		PxgCudaBufferN<2>	mExceededForceElements;
+		PxgTypedCudaBuffer<Dy::ThresholdStreamElement>		mForceChangeThresholdElements;
+
+		PxgTypedCudaBuffer<PxReal>		mThresholdStreamAccumulatedForce;
+		PxgTypedCudaBuffer<PxReal>		mBlocksThresholdStreamAccumulatedForce;
+
+		PxgTypedCudaBuffer<PxU32>		mThresholdStreamWriteIndex;
+		PxgTypedCudaBuffer<PxU32>		mBlocksThresholdStreamWriteIndex;
+		PxgTypedCudaBuffer<bool>		mThresholdStreamWriteable;
+
+		PxgTypedCudaBuffer<PxU32>		mIslandIds;
+		PxgTypedCudaBuffer<PxU32>		mIslandStaticTouchCount;
+
+		PxgSolverSharedDesc<IterativeSolveDataTGS>* mSharedDesc;
+
+		void radixSort(const PxU32 nbPasses);
+
+		friend class PxgArticulationCore;
+
+	public:
+
+		PxgTGSCudaSolverCore(PxgCudaKernelWranglerManager* gpuKernelWrangler, PxCudaContextManager* cudaContextManager, 
+			PxgGpuContext* dynamicContext, PxgHeapMemoryAllocatorManager* heapMemoryManager, const PxGpuDynamicsMemoryConfig& init);
+		~PxgTGSCudaSolverCore();
+
+		void constructSolverSharedDesc(PxgSolverSharedDesc<IterativeSolveDataTGS>& desc, const PxgConstantData& cData,
+			const PxU32 numIters, const PxReal lengthScale, Cm::UnAlignedSpatialVector* deferredZ, PxU32* articulationDirty,
+			uint4* articulationSlabMask);
+
+		void constructConstraitPrepareDesc(PxgConstraintPrepareDesc& desc, const PxU32 numDynamicConstraintBatchHeader,
+			const PxU32 numStaticConstraintBatchHeaders, const PxU32 numDynamic1dConstraintBatches, const PxU32 numStatic1dConstraintBatches,
+			const PxU32 numDynamicContactBatches, const PxU32 numStaticContactBatches,
+			const PxU32 numArti1dConstraintBatches, const PxU32 numArtiContactBatches,
+			const PxU32 numArtiStatic1dConstraintBatches, const PxU32 numArtiStaticContactBatches, 
+			const PxU32 numArtiSelf1dConstraintBatches, const PxU32 numArtiSelfContactBatches,
+			const PxgConstantData& cData, PxU32 totalCurrentEdges, PxU32 totalPreviousEdges, PxU32 totalBodies);
+
+		void constructSolverDesc(PxgSolverCoreDesc& desc, PxU32 numIsland, PxU32 numSolverBodies, PxU32 numConstraintBatchHeader,
+			PxU32 numArticConstraints, PxU32 numSlabs, bool enableStabilization);
+
+		void syncSimulationController();
+
+		virtual void createStreams();
+		virtual void releaseStreams();
+
+		virtual void acquireContext();
+		virtual void releaseContext();
+
+		void gpuMemDMAUpContactData(PxgPinnedHostLinearMemoryAllocator* compressedContactsHostMemoryAllocator,
+			PxU32 compressedContactStreamUpperPartSize,
+			PxU32 compressedContactStreamLowerPartSize,
+			PxgPinnedHostLinearMemoryAllocator* compressedPatchesHostMemoryAllocator,
+			PxU32 compressedPatchStreamUpperPartSize,
+			PxU32 compressedPatchStreamLowerPartSize,
+			PxU32 totalContactManagers,
+			const PartitionIndexData* partitionIndexData,
+			const PartitionNodeData* partitionNodeData,
+			const PxgSolverConstraintManagerConstants* constantData,
+			PxU32 constantDataCount,
+			PxU32 partitionIndexDataCount,
+			const PxU32* partitionConstraintBatchStartIndices,
+			const PxU32* partitionArticConstraintBatchStartIndices,
+			const PxU32* partitionJointBatchCounts,
+			const PxU32* partitionArtiJointBatchCounts,
+			PxU32 nbPartitions,
+			const PxU32* destroyedEdges,
+			PxU32 nbDestroyedEdges,
+			const PxU32* npIndexArray, PxU32 npIndexArraySize,
+			PxU32 totalNumJoints,
+			const PxU32* islandIds, const PxU32* nodeInteractionCounts, PxU32 nbNodes, const PxU32* islandStaticTouchCount, PxU32 nbIslands);
+
+		void gpuMemDmaUpBodyData(PxPinnedArray<PxgSolverBodyData>& solverBodyDataPool,
+			PxPinnedArray<PxgSolverTxIData>& solverTxIDataPool,
+			const PxU32 numSolverBodies,
+			const PxU32 totalNumRigidBatches, const PxU32 totalNumArticBatches,
+			const PxU32 nbSlabs, const PxU32 nbStaticSlabs, const PxU32 maxNumStaticPartitions);
+
+		void allocateSolverBodyBuffers(const PxU32 numSolverBodies,
+			PxPinnedArray<PxNodeIndex>& islandNodeIndices,
+			const PxU32 numActiveActiculations, const PxU32 maxArticulationLinks);
+
+		PxU32 getDescriptorsAllocationSize();
+		void allocatePinnedDescriptors(PxgPinnedHostLinearMemoryAllocator& hostAllocator); 
+
+		void gpuMemDMAUp(PxgPinnedHostLinearMemoryAllocator& hostAllocator, const PxgConstraintPrePrepData& data,
+			const PxU32 numSolverBodies, PxgConstraintBatchHeader* constraintBatchHeaders,
+			PxgIslandContext* islandContextPool, const PxU32 numIslands, const PxgPartitionData& partitionData,
+			const PxU32 numConstraintBatchHeader, const PxU32 numStaticConstraintBatchHeader,
+			const PxU32 numArticConstraintBatchHeader, const PxU32 numArticStaticConstraintBatchHeader, 
+			const PxU32 numArtiSelfConstraintBatchHeader, const PxgConstantData& cData,
+			const PxU32 numContactBlockes, const PxU32 numFrictionBlockes,
+			const PxU32 numArtiContacts, const PxU32 numArtiFrictions,
+			const PxU32 totalCurrentEdges, const PxU32 totalPreviousEdges, const PxU32 numSlabs, const PxU32 maxNbPartitions,
+			const bool enableStabilization, PxU8* cpuContactPatchStreamBase, PxU8* cpuContactStreamBase, PxU8* cpuForceStreamBase, PxsContactManagerOutputIterator& outputIterator,
+			const PxU32 totalActiveBodyCount, const PxU32 activeBodyStartIndex, const PxU32 nbArticulations, Cm::UnAlignedSpatialVector* deferredZ,
+			PxU32* articulationDirty, uint4* articulationSlabMask, Sc::ShapeInteraction** shapeInteractions, PxReal* restDistances,
+			PxsTorsionalFrictionData* torsionalData,
+			PxU32* artiStaticContactIndices, const PxU32 artiContactIndSize, PxU32* artiStaticJointIndices, PxU32 artiStaticJointSize,
+			PxU32* artiStaticContactCounts, PxU32* artiStaticJointCounts,
+			PxU32* artiSelfContactIndices, const PxU32 artiSelfContactIndSize, PxU32* artiSelfJointIndices, PxU32 artiSelfJointSize,
+			PxU32* artiSelfContactCounts, PxU32* artiSelfJointCounts, 
+			PxU32* rigidStaticContactIndices, const PxU32 rigidContactIndSize, PxU32* rigidStaticJointIndices, const PxU32 rigidStaticJointSize,
+			PxU32* rigidStaticContactCounts, PxU32* rigidSaticJointCounts, const PxReal lengthScale, bool hasForceThresholds);
+
+		void gpuMemDMAbackSolverData(PxU8* forceBufferPool, PxU32 forceBufferOffset, PxU32 forceBufferUpperPartSize,
+			PxU32 forceBufferLowerPartSize, Dy::ThresholdStreamElement* changedElems, bool hasForceThresholds, Dy::ConstraintWriteback* constraintWriteBack,
+			const PxU32 writeBackSize, bool copyAllToHost, Dy::ErrorAccumulator*& contactError);
+
+		void syncDmaBack(PxU32& nbChangedThresholdElements);
+
+		void preIntegration(const PxU32 offset, const PxU32 nbSolverBodies, const PxReal dt, const PxVec3& gravity);
+		
+		void jointConstraintBlockPrePrepParallel(PxU32 nbConstraintBatches);
+
+		void jointConstraintPrepareParallel(PxU32 nbJointBatches);
+		void contactConstraintPrepareParallel(PxU32 nbContactBatches);
+		void artiJointConstraintPrepare(PxU32 nbArtiJointBatches);
+		void artiContactConstraintPrepare(PxU32 nbArtiContactBatches);
+		//soft body/cloth/particle constraint prepare
+		void nonRigidConstraintPrepare(PxU32 nbArticulations);
+
+		void solveContactMultiBlockParallel(PxgIslandContext* islandContexts, const PxU32 numIslands, const PxU32 maxPartitions,
+			PxInt32ArrayPinned& constraintsPerPartition, PxInt32ArrayPinned& artiConstraintsPerPartition, const PxVec3& gravity,
+			PxReal* posIterResidualSharedMem, PxU32 posIterResidualSharedMemSize, Dy::ErrorAccumulator* posIterError, PxPinnedArray<Dy::ErrorAccumulator>& artiContactPosIterError,
+			PxPinnedArray<Dy::ErrorAccumulator>& perArticulationInternalError);
+
+		void writeBackBlock(PxU32 a, PxgIslandContext& context);
+
+		void solvePartitions(PxgIslandContext* islandContexts, PxInt32ArrayPinned& constraintsPerPartition, PxInt32ArrayPinned& artiConstraintsPerPartition,
+			PxU32 islandIndex, bool doFriction, PxReal accumulatedDt, PxReal minPen, bool anyArticulationConstraints, bool isVelocityIteration);
+
+		void accumulatedForceThresholdStream(PxU32 maxNodes);
+		void integrateCoreParallel(const PxU32 offset, const PxU32 nbSolverBodies);
+
+		void getDataStreamBase(void*& contactStreamBase, void*& patchStreamBase, void*& forceAndIndexStreamBase);
+	};
+}
+
+#endif
--- a/engine/third_party/physx/source/gpusolver/include/PxgTGSDynamicsContext.h
+++ b/engine/third_party/physx/source/gpusolver/include/PxgTGSDynamicsContext.h
@@ -0,0 +1,64 @@
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef PXG_TGS_DYNAMICS_CONTEXT_H
+#define PXG_TGS_DYNAMICS_CONTEXT_H
+
+#include "PxgContext.h"
+
+namespace physx
+{
+	namespace Cm
+	{
+		class FlushPool;
+	}
+
+	class PxBaseTask;
+
+	class PxsKernelWranglerManager;
+
+	/**
+	\brief A class to represent a GPU dynamics context for the GPU rigid body solver
+	*/
+	class PxgTGSDynamicsContext : public PxgGpuContext
+	{
+		PX_NOCOPY(PxgTGSDynamicsContext)
+
+	public:
+		PxgTGSDynamicsContext(Cm::FlushPool& flushPool, PxsKernelWranglerManager* gpuKernelWrangler, PxCudaContextManager* cudaContextManager, 
+			const PxGpuDynamicsMemoryConfig& config, IG::SimpleIslandManager& islandManager, PxU32 maxNumPartitions, PxU32 maxNumStaticPartitions, bool enableStabilization, bool useEnhancedDeterminism,
+			PxReal maxBiasCoefficient, PxvSimStats& simStats, PxgHeapMemoryAllocatorManager* heapMemoryManager, bool externalForcesEveryTgsIterationEnabled, PxReal lengthScale, bool enableDirectGPUAPI, PxU64 contextID,
+			bool isResidualReportingEnabled);
+
+		virtual void						destroy();
+
+		virtual PxSolverType::Enum			getSolverType()	const	{ return PxSolverType::eTGS;	}
+	};
+}
+
+#endif