// Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // * Neither the name of NVIDIA CORPORATION nor the names of its // contributors may be used to endorse or promote products derived // from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved. // Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. // Copyright (c) 2001-2004 NovodeX AG. All rights reserved. #include "foundation/PxMemory.h" #include "foundation/PxMathUtils.h" #include "DyConstraintPrep.h" #include "DyCpuGpuArticulation.h" #include "PxsRigidBody.h" #include "DySolverConstraint1D.h" #include "foundation/PxSort.h" #include "DySolverConstraintDesc.h" #include "PxcConstraintBlockStream.h" #include "DyArticulationContactPrep.h" #include "foundation/PxSIMDHelpers.h" #include "DyArticulationUtils.h" #include "DyAllocator.h" namespace physx { namespace Dy { // dsequeira: // // we can choose any linear combination of equality constraints and get the same solution // Hence we can orthogonalize the constraints using the inner product given by the // inverse mass matrix, so that when we use PGS, solving a constraint row for a joint // don't disturb the solution of prior rows. // // We also eliminate the equality constraints from the hard inequality constraints - // (essentially projecting the direction corresponding to the lagrange multiplier // onto the equality constraint subspace) but 'til I've verified this generates // exactly the same KKT/complementarity conditions, status is 'experimental'. // // since for equality constraints the resulting rows have the property that applying // an impulse along one row doesn't alter the projected velocity along another row, // all equality constraints (plus one inequality constraint) can be processed in parallel // using SIMD // // Eliminating the inequality constraints from each other would require a solver change // and not give us any more parallelism, although we might get better convergence. namespace { struct MassProps { FloatV invMass0; // the inverse mass of body0 after inverse mass scale was applied FloatV invMass1; // the inverse mass of body1 after inverse mass scale was applied FloatV invInertiaScale0; FloatV invInertiaScale1; PX_FORCE_INLINE MassProps(const PxReal imass0, const PxReal imass1, const PxConstraintInvMassScale& ims) : invMass0(FLoad(imass0 * ims.linear0)), invMass1(FLoad(imass1 * ims.linear1)), invInertiaScale0(FLoad(ims.angular0)), invInertiaScale1(FLoad(ims.angular1)) {} }; PX_FORCE_INLINE PxReal innerProduct(const Px1DConstraint& row0, const Px1DConstraint& row1, const PxVec4& row0AngSqrtInvInertia0, const PxVec4& row0AngSqrtInvInertia1, const PxVec4& row1AngSqrtInvInertia0, const PxVec4& row1AngSqrtInvInertia1, const MassProps& m) { const Vec3V l0 = V3Mul(V3Scale(V3LoadA(row0.linear0), m.invMass0), V3LoadA(row1.linear0)); const Vec3V l1 = V3Mul(V3Scale(V3LoadA(row0.linear1), m.invMass1), V3LoadA(row1.linear1)); const Vec4V r0ang0 = V4LoadA(&row0AngSqrtInvInertia0.x); const Vec4V r1ang0 = V4LoadA(&row1AngSqrtInvInertia0.x); const Vec4V r0ang1 = V4LoadA(&row0AngSqrtInvInertia1.x); const Vec4V r1ang1 = V4LoadA(&row1AngSqrtInvInertia1.x); const Vec3V i0 = V3ScaleAdd(V3Mul(Vec3V_From_Vec4V(r0ang0), Vec3V_From_Vec4V(r1ang0)), m.invInertiaScale0, l0); const Vec3V i1 = V3ScaleAdd(V3MulAdd(Vec3V_From_Vec4V(r0ang1), Vec3V_From_Vec4V(r1ang1), i0), m.invInertiaScale1, l1); PxF32 f; FStore(V3SumElems(i1), &f); return f; } // indexed rotation around axis, with sine and cosine of half-angle PX_FORCE_INLINE PxQuat indexedRotation(PxU32 axis, PxReal s, PxReal c) { PxQuat q(0,0,0,c); reinterpret_cast(&q)[axis] = s; return q; } // PT: TODO: refactor with duplicate in FdMathUtils.cpp PxQuat diagonalize(const PxMat33& m) // jacobi rotation using quaternions { const PxU32 MAX_ITERS = 5; PxQuat q(PxIdentity); PxMat33 d; for(PxU32 i=0; i < MAX_ITERS;i++) { const PxMat33Padded axes(q); d = axes.getTranspose() * m * axes; const PxReal d0 = PxAbs(d[1][2]), d1 = PxAbs(d[0][2]), d2 = PxAbs(d[0][1]); const PxU32 a = PxU32(d0 > d1 && d0 > d2 ? 0 : d1 > d2 ? 1 : 2); // rotation axis index, from largest off-diagonal element const PxU32 a1 = PxGetNextIndex3(a), a2 = PxGetNextIndex3(a1); if(d[a1][a2] == 0.0f || PxAbs(d[a1][a1] - d[a2][a2]) > 2e6f * PxAbs(2.0f * d[a1][a2])) break; const PxReal w = (d[a1][a1] - d[a2][a2]) / (2.0f * d[a1][a2]); // cot(2 * phi), where phi is the rotation angle const PxReal absw = PxAbs(w); PxQuat r; if(absw > 1000) r = indexedRotation(a, 1.0f / (4.0f * w), 1.0f); // h will be very close to 1, so use small angle approx instead else { const PxReal t = 1.0f / (absw + PxSqrt(w * w + 1.0f)); // absolute value of tan phi const PxReal h = 1.0f / PxSqrt(t * t + 1.0f); // absolute value of cos phi PX_ASSERT(h != 1); // |w|<1000 guarantees this with typical IEEE754 machine eps (approx 6e-8) r = indexedRotation(a, PxSqrt((1.0f - h) / 2.0f) * PxSign(w), PxSqrt((1.0f + h) / 2.0f)); } q = (q * r).getNormalized(); } return q; } PX_FORCE_INLINE void rescale(const Mat33V& m, PxVec3& a0, PxVec3& a1, PxVec3& a2) { const Vec3V va0 = V3LoadU(a0); const Vec3V va1 = V3LoadU(a1); const Vec3V va2 = V3LoadU(a2); const Vec3V b0 = V3ScaleAdd(va0, V3GetX(m.col0), V3ScaleAdd(va1, V3GetY(m.col0), V3Scale(va2, V3GetZ(m.col0)))); const Vec3V b1 = V3ScaleAdd(va0, V3GetX(m.col1), V3ScaleAdd(va1, V3GetY(m.col1), V3Scale(va2, V3GetZ(m.col1)))); const Vec3V b2 = V3ScaleAdd(va0, V3GetX(m.col2), V3ScaleAdd(va1, V3GetY(m.col2), V3Scale(va2, V3GetZ(m.col2)))); V3StoreU(b0, a0); V3StoreU(b1, a1); V3StoreU(b2, a2); } PX_FORCE_INLINE void rescale4(const Mat33V& m, PxReal* a0, PxReal* a1, PxReal* a2) { const Vec4V va0 = V4LoadA(a0); const Vec4V va1 = V4LoadA(a1); const Vec4V va2 = V4LoadA(a2); const Vec4V b0 = V4ScaleAdd(va0, V3GetX(m.col0), V4ScaleAdd(va1, V3GetY(m.col0), V4Scale(va2, V3GetZ(m.col0)))); const Vec4V b1 = V4ScaleAdd(va0, V3GetX(m.col1), V4ScaleAdd(va1, V3GetY(m.col1), V4Scale(va2, V3GetZ(m.col1)))); const Vec4V b2 = V4ScaleAdd(va0, V3GetX(m.col2), V4ScaleAdd(va1, V3GetY(m.col2), V4Scale(va2, V3GetZ(m.col2)))); V4StoreA(b0, a0); V4StoreA(b1, a1); V4StoreA(b2, a2); } void diagonalize(Px1DConstraint** row, PxVec4* angSqrtInvInertia0, PxVec4* angSqrtInvInertia1, const MassProps& m) { const PxReal a00 = innerProduct(*row[0], *row[0], angSqrtInvInertia0[0], angSqrtInvInertia1[0], angSqrtInvInertia0[0], angSqrtInvInertia1[0], m); const PxReal a01 = innerProduct(*row[0], *row[1], angSqrtInvInertia0[0], angSqrtInvInertia1[0], angSqrtInvInertia0[1], angSqrtInvInertia1[1], m); const PxReal a02 = innerProduct(*row[0], *row[2], angSqrtInvInertia0[0], angSqrtInvInertia1[0], angSqrtInvInertia0[2], angSqrtInvInertia1[2], m); const PxReal a11 = innerProduct(*row[1], *row[1], angSqrtInvInertia0[1], angSqrtInvInertia1[1], angSqrtInvInertia0[1], angSqrtInvInertia1[1], m); const PxReal a12 = innerProduct(*row[1], *row[2], angSqrtInvInertia0[1], angSqrtInvInertia1[1], angSqrtInvInertia0[2], angSqrtInvInertia1[2], m); const PxReal a22 = innerProduct(*row[2], *row[2], angSqrtInvInertia0[2], angSqrtInvInertia1[2], angSqrtInvInertia0[2], angSqrtInvInertia1[2], m); const PxMat33 a(PxVec3(a00, a01, a02), PxVec3(a01, a11, a12), PxVec3(a02, a12, a22)); const PxQuat q = diagonalize(a); const PxMat33 n(-q); const Mat33V mn(V3LoadU(n.column0), V3LoadU(n.column1), V3LoadU(n.column2)); //KS - We treat as a Vec4V so that we get geometricError rescaled for free along with linear0 rescale4(mn, &row[0]->linear0.x, &row[1]->linear0.x, &row[2]->linear0.x); rescale(mn, row[0]->linear1, row[1]->linear1, row[2]->linear1); //KS - We treat as a PxVec4 so that we get velocityTarget rescaled for free rescale4(mn, &row[0]->angular0.x, &row[1]->angular0.x, &row[2]->angular0.x); rescale(mn, row[0]->angular1, row[1]->angular1, row[2]->angular1); rescale4(mn, &angSqrtInvInertia0[0].x, &angSqrtInvInertia0[1].x, &angSqrtInvInertia0[2].x); rescale4(mn, &angSqrtInvInertia1[0].x, &angSqrtInvInertia1[1].x, &angSqrtInvInertia1[2].x); } // // A 1D constraint between two bodies (b0, b1) acts on specific linear and angular velocity // directions of these two bodies. Let the constrained linear velocity direction for body b0 // be l0 and the constrained angular velocity direction be a0. Likewise, let l1 and a1 be // the corresponding constrained velocity directions for body b1. // // Let the constraint Jacobian J be the 1x12 vector that combines the 3x1 vectors l0, a0, l1, a1 // J = | l0^T, a0^T, l1^T, a1^T | // // Let vl0, va0, vl1, va1 be the 3x1 linear/angular velocites of two bodies // and v be the 12x1 combination of those: // // | vl0 | // v = | va0 | // | vl1 | // | va1 | // // The constraint projected velocity scalar is then: // projV = J * v // // Let M be the 12x12 mass matrix (with scalar masses m0, m1 and 3x3 inertias I0, I1) // // | m0 | // | m0 | // | m0 | // | | | | // | | I0 | | // | | | | // | m1 | // | m1 | // | m1 | // | | | | // | | I1 | | // | | | | // // Let p be the impulse scalar that results from solving the 1D constraint given // projV, geometric error etc. // Turning this impulse p to a 12x1 delta velocity vector dv: // // dv = M^-1 * (J^T * p) = M^-1 * J^T * p // // Now to consider the case of multiple 1D constraints J0, J1, J2, ... operating // on the same body pair. // // Let K be the matrix holding these constraints as rows // // | J0 | // K = | J1 | // | J2 | // | ... | // // Applying these constraints: // // | | | p0 | // | dv0 dv1 dv2 ... | = M^-1 * K^T * | p1 | // | | | p2 | // // Let MK = (M^-1 * K^T)^T = K * M^-1 (M^-1 is symmetric). The transpose // is only used here to talk about constraint rows instead of columns (since // that expression seems to be used more commonly here). // // dvMatrix = MK^T * pMatrix // // The rows of MK define how an impulse affects the constrained velocity directions // of the two bodies. Ideally, the different constraint rows operate on independent // parts of the velocity such that constraints don't step on each others toe // (constraint A making the situation better with respect to its own constrained // velocity directions but worse for directions of constraint B and vice versa). // A formal way to specify this goal is to try to have the rows of MK be orthogonal // to each other, that is, to orthogonalize the MK matrix. This will eliminate // any action of a constraint in directions that have been touched by previous // constraints already. This re-configuration of constraint rows does not work in // general but for hard equality constraints (no spring, targetVelocity=0, // min/maxImpulse unlimited), changing the constraint rows to make them orthogonal // should not change the solution of the constraint problem. As an example, one // might consider a joint with two 1D constraints that lock linear movement in the // xy-plane (those are hard equality constraints). It's fine to choose different // constraint directions from the ones provided, assuming the new directions are // still in the xy-plane and that the geometric errors get patched up accordingly. // // \param[in,out] row Pointers to the constraints to orthogonalize. The members // linear0/1, angular0/1, geometricError and velocityTarget will // get changed potentially // \param[in,out] angSqrtInvInertia0 body b0 angular velocity directions of the // constraints provided in parameter row but // multiplied by the square root ot the inverse // inertia tensor of body b0. // I0^(-1/2) * angularDirection0, ... // Will be replaced by the orthogonalized vectors. // Note: the fourth component of the vectors serves // no purpose in this method. // \param[in,out] angSqrtInvInertia1 Same as previous parameter but for body b1. // \param[in] rowCount Number of entries in row, angSqrtInvInertia0, angSqrtInvInertia1 // \param[in] eqRowCount Number of entries in row that represent equality constraints. // The method expects the entries in row to be sorted by equality // constraints first, followed by inequality constraints. The // latter get orthogonalized relative to the equality constraints // but not relative to the other inequality constraints. // \param[in] m Some mass properties of the two bodies b0, b1. // void orthogonalize(Px1DConstraint** row, PxVec4* angSqrtInvInertia0, PxVec4* angSqrtInvInertia1, PxU32 rowCount, PxU32 eqRowCount, const MassProps &m) { PX_ASSERT(eqRowCount<=6); const FloatV zero = FZero(); Vec3V lin1m[6], ang1m[6], lin1[6], ang1[6]; Vec4V lin0m[6], ang0m[6]; // must have 0 in the W-field Vec4V lin0AndG[6], ang0AndT[6]; for(PxU32 i=0;ilinear0.x); // linear0 and geometric error Vec4V a0AndT = V4LoadA(&row[i]->angular0.x); // angular0 and velocity target Vec3V l1 = Vec3V_From_Vec4V(V4LoadA(&row[i]->linear1.x)); Vec3V a1 = Vec3V_From_Vec4V(V4LoadA(&row[i]->angular1.x)); Vec4V angSqrtL0 = V4LoadA(&angSqrtInvInertia0[i].x); Vec4V angSqrtL1 = V4LoadA(&angSqrtInvInertia1[i].x); const PxU32 eliminationRows = PxMin(i, eqRowCount); for(PxU32 j=0;j----------------> // proj_u(v) u // // Let be the dot/inner product of the two vectors v and u. // // proj_u(v) = * normalize(u) // = * (u/|u|) = / (|u|*|u|) * u // = / * u // // The implementation here maps as follows: // // u = [orthoLinear0, orthoAngular0, orthoLinear1, orthoAngular1] // v = [row[]->linear0, row[]->angular0, row[]->linear1, row[]->angular1] // // Since the solver is using momocity, orthogonality should not be achieved for rows // M^-1 * u but for rows uM = M^(-1/2) * u (with M^(-1/2) being the square root of the // inverse mass matrix). Following the described orthogonalization procedure to turn // v1m into u1m that is orthogonal to u0m: // // u1M = v1M - proj_u0M(v1M) // M^(-1/2) * u1 = M^(-1/2) * v1 - / * M^(-1/2) * u0 // // Since M^(-1/2) is multiplied on the left and right hand side, this can be transformed to: // // u1 = v1 - / * u0 // // For the computation of /, the following shall be considered: // // // = // = (M^(-1/2) * v)^T * (M^(-1/2) * u) (v and u being seen as 12x1 vectors here) // = v^T * M^(-1/2)^T * M^(-1/2) * u // = v^T * M^-1 * u (M^(-1/2) is a symmetric matrix, thus transposing has no effect) // = // // Applying this: // // / = / // // The code uses: // // v1m_ = [v1Lin0, I0^(-1/2) * v1Ang0, v1Lin1, I1^(-1/2) * v1Ang1] // u0m_ = [(1/m0) * u0Lin0, I0^(-1/2) * u0Ang0, (1/m1) * u0Lin1, I1^(-1/2) * u0Ang1] // u0m* = u0m_ / (see variables named lin0m, ang0m, lin1m, ang1m in the code) // // And then does: // // = / = / = / // (see variable named t in the code) // // note: u0, u1, ... get computed for equality constraints. Inequality constraints do not generate new // "base" vectors. Let's say u0, u1 are from equality constraints, then for inequality constraints // u2, u3: // // u2 = v2 - proj_u0(v2) - proj_u1(v2) // u3 = v3 - proj_u0(v3) - proj_u1(v3) // // in other words: the inequality constraints will be orthogonal to the equality constraints but not // to other inequality constraints. // const Vec3V s0 = V3MulAdd(l1, lin1m[j], Vec3V_From_Vec4V_WUndefined(V4Mul(l0AndG, lin0m[j]))); const Vec3V s1 = V3MulAdd(Vec3V_From_Vec4V_WUndefined(angSqrtL1), ang1m[j], Vec3V_From_Vec4V_WUndefined(V4Mul(angSqrtL0, ang0m[j]))); const FloatV t = V3SumElems(V3Add(s0, s1)); l0AndG = V4NegScaleSub(lin0AndG[j], t, l0AndG); // note: this can reduce the error term by the amount covered by the orthogonal base vectors a0AndT = V4NegScaleSub(ang0AndT[j], t, a0AndT); // note: for equality and inequality constraints, target velocity is expected to be 0 l1 = V3NegScaleSub(lin1[j], t, l1); a1 = V3NegScaleSub(ang1[j], t, a1); angSqrtL0 = V4NegScaleSub(V4LoadA(&angSqrtInvInertia0[j].x), t, angSqrtL0); angSqrtL1 = V4NegScaleSub(V4LoadA(&angSqrtInvInertia1[j].x), t, angSqrtL1); // note: angSqrtL1 is equivalent to I1^(-1/2) * a1 (and same goes for angSqrtL0) } V4StoreA(l0AndG, &row[i]->linear0.x); V4StoreA(a0AndT, &row[i]->angular0.x); V3StoreA(l1, row[i]->linear1); V3StoreA(a1, row[i]->angular1); V4StoreA(angSqrtL0, &angSqrtInvInertia0[i].x); V4StoreA(angSqrtL1, &angSqrtInvInertia1[i].x); if(i= (sizeof(PxVec3) + sizeof(PxU32))); //PX_COMPILE_TIME_ASSERT((sizeof(Px1DConstraint) - PX_OFFSET_OF_RT(Px1DConstraint, angular1)) >= (sizeof(PxVec3) + sizeof(PxU32))); // PT: TODO: move somewhere else PX_FORCE_INLINE Vec3V M33MulV4(const Mat33V& a, const Vec4V b) { const FloatV x = V4GetX(b); const FloatV y = V4GetY(b); const FloatV z = V4GetZ(b); const Vec3V v0 = V3Scale(a.col0, x); const Vec3V v1 = V3Scale(a.col1, y); const Vec3V v2 = V3Scale(a.col2, z); const Vec3V v0PlusV1 = V3Add(v0, v1); return V3Add(v0PlusV1, v2); } void preprocessRows(Px1DConstraint** sorted, Px1DConstraint* rows, PxVec4* angSqrtInvInertia0, PxVec4* angSqrtInvInertia1, PxU32 rowCount, const PxMat33& sqrtInvInertia0F32, const PxMat33& sqrtInvInertia1F32, const PxReal invMass0, const PxReal invMass1, const PxConstraintInvMassScale& ims, bool disablePreprocessing, bool diagonalizeDrive) { // j is maxed at 12, typically around 7, so insertion sort is fine for(PxU32 i=0; i0 && r->solveHint < sorted[j-1]->solveHint; j--) sorted[j] = sorted[j-1]; sorted[j] = r; } for(PxU32 i=0;isolveHint <= sorted[i+1]->solveHint); // PT: it is always safe to use "V3LoadU_SafeReadW" on the two first columns of a PxMat33. However in this case the passed matrices // come from PxSolverBodyData::sqrtInvInertia (PGS) or PxTGSSolverBodyTxInertia::sqrtInvInertia (TGS). It is currently unsafe to use // V3LoadU_SafeReadW in the TGS case (the matrix is the last element of the structure). So we keep V3LoadU here for now. For PGS we // have a compile-time-assert (see PxSolverBodyData struct) to ensure safe reads on sqrtInvInertia. // Note that because we only use this in M33MulV4 below, the ClearW calls could also be skipped. const Mat33V sqrtInvInertia0 = Mat33V( V3LoadU_SafeReadW(sqrtInvInertia0F32.column0), V3LoadU_SafeReadW(sqrtInvInertia0F32.column1), V3LoadU(sqrtInvInertia0F32.column2)); const Mat33V sqrtInvInertia1 = Mat33V( V3LoadU_SafeReadW(sqrtInvInertia1F32.column0), V3LoadU_SafeReadW(sqrtInvInertia1F32.column1), V3LoadU(sqrtInvInertia1F32.column2)); PX_ASSERT(((uintptr_t(angSqrtInvInertia0)) & 0xF) == 0); PX_ASSERT(((uintptr_t(angSqrtInvInertia1)) & 0xF) == 0); for(PxU32 i=0; iangular0)); //const Vec3V angDelta1_ = M33MulV3(sqrtInvInertia1, V3LoadU(sorted[i]->angular1)); const Vec3V angDelta0 = M33MulV4(sqrtInvInertia0, V4LoadA(&sorted[i]->angular0.x)); const Vec3V angDelta1 = M33MulV4(sqrtInvInertia1, V4LoadA(&sorted[i]->angular1.x)); V4StoreA(Vec4V_From_Vec3V(angDelta0), &angSqrtInvInertia0[i].x); V4StoreA(Vec4V_From_Vec3V(angDelta1), &angSqrtInvInertia1[i].x); } if(disablePreprocessing) return; MassProps m(invMass0, invMass1, ims); for(PxU32 i=0;isolveHint>>8), start = i++; while(isolveHint>>8) == groupMajorId) i++; if(groupMajorId == 4 || (groupMajorId == 8)) { // // PGS: // - make all equality constraints orthogonal to each other // - make all inequality constraints orthogonal to all equality constraints // This assumes only PxConstraintSolveHint::eEQUALITY and ::eINEQUALITY is used. // // TGS: // - make all linear equality constraints orthogonal to each other // - make all linear inequality constraints orthogonal to all linear equality constraints // - make all angular equality constraints orthogonal to each other // - make all angular inequality constraints orthogonal to all angular equality constraints // This is achieved by internally turning PxConstraintSolveHint::eEQUALITY and ::eINEQUALITY into // ::eROTATIONAL_EQUALITY and ::eROTATIONAL_INEQUALITY for angular constraints. // PxU32 bCount = start; // count of bilateral constraints for(; bCountsolveHint&255)==0; bCount++) ; orthogonalize(sorted+start, angSqrtInvInertia0+start, angSqrtInvInertia1+start, i-start, bCount-start, m); } if(groupMajorId == 1 && diagonalizeDrive) { PxU32 slerp = start; // count of bilateral constraints for(; slerpsolveHint&255)!=2; slerp++) ; if(slerp+3 == i) diagonalize(sorted+slerp, angSqrtInvInertia0+slerp, angSqrtInvInertia1+slerp, m); PX_ASSERT(i-start==3); diagonalize(sorted+start, angSqrtInvInertia0+start, angSqrtInvInertia1+start, m); } } } PxU32 ConstraintHelper::setupSolverConstraint( PxSolverConstraintPrepDesc& prepDesc, PxConstraintAllocator& allocator, PxReal simDt, PxReal recipSimDt) { if (prepDesc.numRows == 0) { prepDesc.desc->constraint = NULL; prepDesc.desc->writeBack = NULL; prepDesc.desc->constraintLengthOver16 = 0; return 0; } PxSolverConstraintDesc& desc = *prepDesc.desc; const bool isExtended = (desc.linkIndexA != PxSolverConstraintDesc::RIGID_BODY) || (desc.linkIndexB != PxSolverConstraintDesc::RIGID_BODY); const PxU32 stride = isExtended ? sizeof(SolverConstraint1DExt) : sizeof(SolverConstraint1D); const PxU32 constraintLength = sizeof(SolverConstraint1DHeader) + stride * prepDesc.numRows; //KS - +16 is for the constraint progress counter, which needs to be the last element in the constraint (so that we //know SPU DMAs have completed) PxU8* ptr = allocator.reserveConstraintData(constraintLength + 16u); if(!checkConstraintDataPtr(ptr)) return 0; desc.constraint = ptr; setConstraintLength(desc,constraintLength); desc.writeBack = prepDesc.writeback; PxMemSet(desc.constraint, 0, constraintLength); SolverConstraint1DHeader* header = reinterpret_cast(desc.constraint); PxU8* constraints = desc.constraint + sizeof(SolverConstraint1DHeader); init(*header, PxTo8(prepDesc.numRows), isExtended, prepDesc.invMassScales); header->body0WorldOffset = prepDesc.body0WorldOffset; header->linBreakImpulse = prepDesc.linBreakForce * simDt; header->angBreakImpulse = prepDesc.angBreakForce * simDt; header->breakable = PxU8((prepDesc.linBreakForce != PX_MAX_F32) || (prepDesc.angBreakForce != PX_MAX_F32)); header->invMass0D0 = prepDesc.data0->invMass * prepDesc.invMassScales.linear0; header->invMass1D1 = prepDesc.data1->invMass * prepDesc.invMassScales.linear1; PX_ALIGN(16, PxVec4) angSqrtInvInertia0[MAX_CONSTRAINT_ROWS]; PX_ALIGN(16, PxVec4) angSqrtInvInertia1[MAX_CONSTRAINT_ROWS]; Px1DConstraint* sorted[MAX_CONSTRAINT_ROWS]; preprocessRows(sorted, prepDesc.rows, angSqrtInvInertia0, angSqrtInvInertia1, prepDesc.numRows, prepDesc.data0->sqrtInvInertia, prepDesc.data1->sqrtInvInertia, prepDesc.data0->invMass, prepDesc.data1->invMass, prepDesc.invMassScales, isExtended || prepDesc.disablePreprocessing, prepDesc.improvedSlerp); const PxReal erp = 1.0f; PxU32 outCount = 0; const SolverExtBody eb0(reinterpret_cast(prepDesc.body0), prepDesc.data0, desc.linkIndexA); const SolverExtBody eb1(reinterpret_cast(prepDesc.body1), prepDesc.data1, desc.linkIndexB); PxReal cfm = 0.f; if (isExtended) { cfm = PxMax(eb0.getCFM(), eb1.getCFM()); } for (PxU32 i = 0; i(constraints); Px1DConstraint& c = *sorted[i]; PxReal minImpulse, maxImpulse; computeMinMaxImpulseOrForceAsImpulse( c.minImpulse, c.maxImpulse, c.flags & Px1DConstraintFlag::eHAS_DRIVE_LIMIT, prepDesc.driveLimitsAreForces, simDt, minImpulse, maxImpulse); PxReal unitResponse; PxReal jointSpeedForRestitutionBounce = 0.0f; PxReal initJointSpeed = 0.0f; const PxReal minResponseThreshold = prepDesc.minResponseThreshold; if(!isExtended) { //The theoretical formulation of the Jacobian J has 4 terms {linear0, angular0, linear1, angular1} //s.lin0 and s.lin1 match J.linear0 and J.linear1. //We compute s.ang0 and s.ang1 but these *must not* be confused with the angular terms of the theoretical Jacobian. //s.ang0 and s.ang1 are part of the momocity system that computes deltas to the angular motion that are neither //angular momentum nor angular velocity. //s.ang0 = I0^(-1/2) * J.angular0 with I0 denoting the inertia of body0 in the world frame. //s.ang1 = I1^(-1/2) * J.angular1 with I1 denoting the inertia of body1 in the world frame. //We then compute the unit response r = J * M^-1 * JTranspose with M denoting the mass matrix. //r = (1/m0)*|J.linear0|^2 + (1/m1)*|J.linear1|^2 + J.angular0 * I0^-1 * J.angular0 + J.angular1 * I1^-1 * J.angular1 //We can write out the term [J.angular0 * I0^-1 * J.angular0] in a different way: //J.angular0 * I0^-1 * J.angular0 = [J.angular0 * I0^(-1/2)] dot [I0^(-1/2) * J.angular0] //Noting that s.ang0 = J.angular0 * I0^(-1/2) and the equivalent expression for body 1, we have the following: //r = (1/m0)*|s.lin0|^2 + (1/m1)*|s.lin1|^2 + |s.ang0|^2 + |s.ang1|^2 //Init vel is computed using the standard Jacobian method because at this stage we have linear and angular velocities. //The code that resolves the constraints instead accumulates delta linear velocities and delta angular momocities compatible with //s.lin0/lin1 and s.ang0/ang1. Right now, though, we have only linear and angular velocities compatible with the theoretical //Jacobian form: //initVel = J.linear0.dot(linVel0) + J.angular0.dot(angvel0) - J.linear1.dot(linVel1) - J.angular1.dot(angvel1) init(s, c.linear0, c.linear1, PxVec3(angSqrtInvInertia0[i].x, angSqrtInvInertia0[i].y, angSqrtInvInertia0[i].z), PxVec3(angSqrtInvInertia1[i].x, angSqrtInvInertia1[i].y, angSqrtInvInertia1[i].z), minImpulse, maxImpulse); s.ang0Writeback = c.angular0; const PxReal resp0 = s.lin0.magnitudeSquared() * prepDesc.data0->invMass * prepDesc.invMassScales.linear0 + s.ang0.magnitudeSquared() * prepDesc.invMassScales.angular0; const PxReal resp1 = s.lin1.magnitudeSquared() * prepDesc.data1->invMass * prepDesc.invMassScales.linear1 + s.ang1.magnitudeSquared() * prepDesc.invMassScales.angular1; unitResponse = resp0 + resp1; initJointSpeed = jointSpeedForRestitutionBounce = prepDesc.data0->projectVelocity(c.linear0, c.angular0) - prepDesc.data1->projectVelocity(c.linear1, c.angular1); } else { //this is articulation/deformable volume init(s, c.linear0, c.linear1, c.angular0, c.angular1, minImpulse, maxImpulse); SolverConstraint1DExt& e = static_cast(s); const Cm::SpatialVector resp0 = createImpulseResponseVector(e.lin0, e.ang0, eb0); const Cm::SpatialVector resp1 = createImpulseResponseVector(-e.lin1, -e.ang1, eb1); unitResponse = getImpulseResponse(eb0, resp0, unsimdRef(e.deltaVA), prepDesc.invMassScales.linear0, prepDesc.invMassScales.angular0, eb1, resp1, unsimdRef(e.deltaVB), prepDesc.invMassScales.linear1, prepDesc.invMassScales.angular1, false); //Add CFM term! if(unitResponse <= DY_ARTICULATION_MIN_RESPONSE) continue; unitResponse += cfm; s.ang0Writeback = c.angular0; s.lin0 = resp0.linear; s.ang0 = resp0.angular; s.lin1 = -resp1.linear; s.ang1 = -resp1.angular; PxReal vel0, vel1; const bool b0IsRigidDynamic = (eb0.mLinkIndex == PxSolverConstraintDesc::RIGID_BODY); const bool b1IsRigidDynamic = (eb1.mLinkIndex == PxSolverConstraintDesc::RIGID_BODY); if(needsNormalVel(c) || b0IsRigidDynamic || b1IsRigidDynamic) { vel0 = eb0.projectVelocity(c.linear0, c.angular0); vel1 = eb1.projectVelocity(c.linear1, c.angular1); Dy::computeJointSpeedPGS(vel0, b0IsRigidDynamic, vel1, b1IsRigidDynamic, jointSpeedForRestitutionBounce, initJointSpeed); } //minResponseThreshold = PxMax(minResponseThreshold, DY_ARTICULATION_MIN_RESPONSE); } const PxReal recipUnitResponse = computeRecipUnitResponse(unitResponse, minResponseThreshold); s.setSolverConstants( compute1dConstraintSolverConstantsPGS( c.flags, c.mods.spring.stiffness, c.mods.spring.damping, c.mods.bounce.restitution, c.mods.bounce.velocityThreshold, c.geometricError, c.velocityTarget, jointSpeedForRestitutionBounce, initJointSpeed, unitResponse, recipUnitResponse, erp, simDt, recipSimDt)); if(c.flags & Px1DConstraintFlag::eOUTPUT_FORCE) s.setOutputForceFlag(true); outCount++; constraints += stride; } //Reassign count to the header because we may have skipped some rows if they were degenerate header->count = PxU8(outCount); return prepDesc.numRows; } PxU32 SetupSolverConstraint(SolverConstraintShaderPrepDesc& shaderDesc, PxSolverConstraintPrepDesc& prepDesc, PxConstraintAllocator& allocator, PxReal dt, PxReal invdt) { // LL shouldn't see broken constraints PX_ASSERT(!(reinterpret_cast(prepDesc.writeback)->isBroken())); setConstraintLength(*prepDesc.desc, 0); if (!shaderDesc.solverPrep) return 0; //PxU32 numAxisConstraints = 0; Px1DConstraint rows[MAX_CONSTRAINT_ROWS]; setupConstraintRows(rows, MAX_CONSTRAINT_ROWS); prepDesc.invMassScales.linear0 = prepDesc.invMassScales.linear1 = prepDesc.invMassScales.angular0 = prepDesc.invMassScales.angular1 = 1.0f; prepDesc.body0WorldOffset = PxVec3(0.0f); PxVec3p unused_ra, unused_rb; //TAG::solverprepcall prepDesc.numRows = prepDesc.disableConstraint ? 0 : (*shaderDesc.solverPrep)(rows, prepDesc.body0WorldOffset, MAX_CONSTRAINT_ROWS, prepDesc.invMassScales, shaderDesc.constantBlock, prepDesc.bodyFrame0, prepDesc.bodyFrame1, prepDesc.extendedLimits, unused_ra, unused_rb); prepDesc.rows = rows; return ConstraintHelper::setupSolverConstraint(prepDesc, allocator, dt, invdt); } } }