3639 lines
156 KiB
C++
3639 lines
156 KiB
C++
// Redistribution and use in source and binary forms, with or without
|
|
// modification, are permitted provided that the following conditions
|
|
// are met:
|
|
// * Redistributions of source code must retain the above copyright
|
|
// notice, this list of conditions and the following disclaimer.
|
|
// * Redistributions in binary form must reproduce the above copyright
|
|
// notice, this list of conditions and the following disclaimer in the
|
|
// documentation and/or other materials provided with the distribution.
|
|
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
|
// contributors may be used to endorse or promote products derived
|
|
// from this software without specific prior written permission.
|
|
//
|
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
|
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
//
|
|
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
|
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
|
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
|
|
|
#include "foundation/PxPreprocessor.h"
|
|
#include "foundation/PxVecMath.h"
|
|
#include "PxcNpWorkUnit.h"
|
|
#include "PxcNpContactPrepShared.h"
|
|
#include "DyTGSDynamics.h"
|
|
#include "DyCpuGpu1dConstraint.h"
|
|
#include "DyAllocator.h"
|
|
|
|
using namespace physx;
|
|
using namespace Gu;
|
|
|
|
#include "PxsMaterialManager.h"
|
|
#include "DyContactPrepShared.h"
|
|
#include "DyConstraintPrep.h"
|
|
#include "DyTGS.h"
|
|
#include "DySolverContext.h"
|
|
|
|
namespace physx
|
|
{
|
|
namespace Dy
|
|
{
|
|
inline bool ValidateVec4(const Vec4V v)
|
|
{
|
|
PX_ALIGN(16, PxVec4 vF);
|
|
aos::V4StoreA(v, &vF.x);
|
|
return vF.isFinite();
|
|
}
|
|
|
|
PX_FORCE_INLINE void QuatRotate4(const Vec4VArg qx, const Vec4VArg qy, const Vec4VArg qz, const Vec4VArg qw, const Vec4VArg vx, const Vec4VArg vy, const Vec4VArg vz,
|
|
Vec4V& rX, Vec4V& rY, Vec4V& rZ)
|
|
{
|
|
/*
|
|
const PxVec3 qv(x,y,z);
|
|
return (v*(w*w-0.5f) + (qv.cross(v))*w + qv*(qv.dot(v)))*2;
|
|
*/
|
|
|
|
const Vec4V two = V4Splat(FLoad(2.f));
|
|
const Vec4V nhalf = V4Splat(FLoad(-0.5f));
|
|
const Vec4V w2 = V4MulAdd(qw, qw, nhalf);
|
|
const Vec4V ax = V4Mul(vx, w2);
|
|
const Vec4V ay = V4Mul(vy, w2);
|
|
const Vec4V az = V4Mul(vz, w2);
|
|
|
|
const Vec4V crX = V4NegMulSub(qz, vy, V4Mul(qy, vz));
|
|
const Vec4V crY = V4NegMulSub(qx, vz, V4Mul(qz, vx));
|
|
const Vec4V crZ = V4NegMulSub(qy, vx, V4Mul(qx, vy));
|
|
|
|
const Vec4V tempX = V4MulAdd(crX, qw, ax);
|
|
const Vec4V tempY = V4MulAdd(crY, qw, ay);
|
|
const Vec4V tempZ = V4MulAdd(crZ, qw, az);
|
|
|
|
Vec4V dotuv = V4Mul(qx, vx);
|
|
dotuv = V4MulAdd(qy, vy, dotuv);
|
|
dotuv = V4MulAdd(qz, vz, dotuv);
|
|
|
|
rX = V4Mul(V4MulAdd(qx, dotuv, tempX), two);
|
|
rY = V4Mul(V4MulAdd(qy, dotuv, tempY), two);
|
|
rZ = V4Mul(V4MulAdd(qz, dotuv, tempZ), two);
|
|
}
|
|
|
|
struct SolverContactHeaderStepBlock
|
|
{
|
|
enum
|
|
{
|
|
eHAS_MAX_IMPULSE = 1 << 0,
|
|
eHAS_TARGET_VELOCITY = 1 << 1
|
|
};
|
|
|
|
PxU8 type; //Note: mType should be first as the solver expects a type in the first byte.
|
|
PxU8 numNormalConstr;
|
|
PxU8 numFrictionConstr;
|
|
PxU8 flag;
|
|
|
|
PxU8 flags[4];
|
|
|
|
//KS - used for write-back only
|
|
PxU8 numNormalConstrs[4];
|
|
PxU8 numFrictionConstrs[4];
|
|
|
|
//Vec4V restitution;
|
|
Vec4V staticFriction;
|
|
Vec4V dynamicFriction;
|
|
//Technically, these mass properties could be pulled out into a new structure and shared. For multi-manifold contacts,
|
|
//this would save 64 bytes per-manifold after the cost of the first manifold
|
|
Vec4V invMass0D0;
|
|
Vec4V invMass1D1;
|
|
Vec4V angDom0;
|
|
Vec4V angDom1;
|
|
//Normal is shared between all contacts in the batch. This will save some memory!
|
|
Vec4V normalX;
|
|
Vec4V normalY;
|
|
Vec4V normalZ;
|
|
|
|
Vec4V maxPenBias;
|
|
|
|
Sc::ShapeInteraction* shapeInteraction[4]; //192 or 208
|
|
|
|
BoolV broken;
|
|
PxU8* frictionBrokenWritebackByte[4];
|
|
};
|
|
|
|
struct SolverContactPointStepBlock
|
|
{
|
|
Vec4V raXnI[3];
|
|
Vec4V rbXnI[3];
|
|
Vec4V separation;
|
|
Vec4V velMultiplier;
|
|
Vec4V targetVelocity;
|
|
Vec4V biasCoefficient;
|
|
Vec4V recipResponse;
|
|
};
|
|
|
|
//KS - technically, this friction constraint has identical data to the above contact constraint.
|
|
//We make them separate structs for clarity
|
|
struct SolverContactFrictionStepBlock
|
|
{
|
|
Vec4V normal[3];
|
|
Vec4V raXnI[3];
|
|
Vec4V rbXnI[3];
|
|
Vec4V error;
|
|
Vec4V velMultiplier;
|
|
Vec4V targetVel;
|
|
Vec4V biasCoefficient;
|
|
};
|
|
|
|
struct SolverConstraint1DHeaderStep4
|
|
{
|
|
PxU8 type; // enum SolverConstraintType - must be first byte
|
|
PxU8 pad0[3];
|
|
//These counts are the max of the 4 sets of data.
|
|
//When certain pairs have fewer constraints than others, they are padded with 0s so that no work is performed but
|
|
//calculations are still shared (afterall, they're computationally free because we're doing 4 things at a time in SIMD)
|
|
PxU32 count;
|
|
PxU8 counts[4];
|
|
PxU8 breakable[4];
|
|
|
|
Vec4V linBreakImpulse;
|
|
Vec4V angBreakImpulse;
|
|
Vec4V invMass0D0;
|
|
Vec4V invMass1D1;
|
|
Vec4V angD0;
|
|
Vec4V angD1;
|
|
|
|
Vec4V body0WorkOffset[3];
|
|
Vec4V rAWorld[3];
|
|
Vec4V rBWorld[3];
|
|
|
|
Vec4V angOrthoAxis0X[3];
|
|
Vec4V angOrthoAxis0Y[3];
|
|
Vec4V angOrthoAxis0Z[3];
|
|
Vec4V angOrthoAxis1X[3];
|
|
Vec4V angOrthoAxis1Y[3];
|
|
Vec4V angOrthoAxis1Z[3];
|
|
Vec4V angOrthoRecipResponse[3];
|
|
Vec4V angOrthoError[3];
|
|
};
|
|
|
|
PX_ALIGN_PREFIX(16)
|
|
struct SolverConstraint1DStep4
|
|
{
|
|
public:
|
|
Vec4V lin0[3]; //!< linear velocity projection (body 0)
|
|
Vec4V error; //!< constraint error term - must be scaled by biasScale. Can be adjusted at run-time
|
|
|
|
Vec4V lin1[3]; //!< linear velocity projection (body 1)
|
|
Vec4V biasScale; //!< constraint constant bias scale. Constant
|
|
|
|
Vec4V ang0[3]; //!< angular velocity projection (body 0)
|
|
Vec4V velMultiplier; //!< constraint velocity multiplier
|
|
|
|
Vec4V ang1[3]; //!< angular velocity projection (body 1)
|
|
|
|
Vec4V velTarget; //!< Scaled target velocity of the constraint drive
|
|
|
|
Vec4V minImpulse; //!< Lower bound on impulse magnitude
|
|
Vec4V maxImpulse; //!< Upper bound on impulse magnitude
|
|
Vec4V appliedForce; //!< applied force to correct velocity+bias
|
|
|
|
Vec4V maxBias;
|
|
Vec4V angularErrorScale; //Constant
|
|
PxU32 flags[4];
|
|
} PX_ALIGN_SUFFIX(16);
|
|
|
|
PX_ALIGN_PREFIX(16)
|
|
struct SolverConstraint1DStep4WithResidual : public SolverConstraint1DStep4
|
|
{
|
|
Vec4V residualVelIter;
|
|
Vec4V residualPosIter;
|
|
}PX_ALIGN_SUFFIX(16);
|
|
|
|
static void setupFinalizeSolverConstraints4Step(PxTGSSolverContactDesc* PX_RESTRICT descs, CorrelationBuffer& c,
|
|
PxU8* PX_RESTRICT workspace, PxReal invDtF32, PxReal totalDtF32, PxReal invTotalDtF32,
|
|
PxReal dtF32, PxReal bounceThresholdF32, PxReal biasCoefficient,
|
|
const aos::Vec4VArg invMassScale0, const aos::Vec4VArg invInertiaScale0,
|
|
const aos::Vec4VArg invMassScale1, const aos::Vec4VArg invInertiaScale1)
|
|
{
|
|
//OK, we have a workspace of pre-allocated space to store all 4 descs in. We now need to create the constraints in it
|
|
|
|
//const Vec4V ccdMaxSeparation = aos::V4LoadXYZW(descs[0].maxCCDSeparation, descs[1].maxCCDSeparation, descs[2].maxCCDSeparation, descs[3].maxCCDSeparation);
|
|
const Vec4V solverOffsetSlop = aos::V4LoadXYZW(descs[0].offsetSlop, descs[1].offsetSlop, descs[2].offsetSlop, descs[3].offsetSlop);
|
|
|
|
const Vec4V zero = V4Zero();
|
|
const Vec4V one = V4One();
|
|
const BoolV bFalse = BFFFF();
|
|
const BoolV bTrue = BTTTT();
|
|
const FloatV fZero = FZero();
|
|
|
|
PxU8 flags[4] = { PxU8(descs[0].hasForceThresholds ? SolverContactHeader::eHAS_FORCE_THRESHOLDS : 0),
|
|
PxU8(descs[1].hasForceThresholds ? SolverContactHeader::eHAS_FORCE_THRESHOLDS : 0),
|
|
PxU8(descs[2].hasForceThresholds ? SolverContactHeader::eHAS_FORCE_THRESHOLDS : 0),
|
|
PxU8(descs[3].hasForceThresholds ? SolverContactHeader::eHAS_FORCE_THRESHOLDS : 0) };
|
|
|
|
const bool hasMaxImpulse = descs[0].hasMaxImpulse || descs[1].hasMaxImpulse || descs[2].hasMaxImpulse || descs[3].hasMaxImpulse;
|
|
|
|
//The block is dynamic if **any** of the constraints have a non-static body B. This allows us to batch static and non-static constraints but we only get a memory/perf
|
|
//saving if all 4 are static. This simplifies the constraint partitioning such that it only needs to care about separating contacts and 1D constraints (which it already does)
|
|
bool isDynamic = false;
|
|
bool hasKinematic = false;
|
|
|
|
PxReal kinematicScale0F32[4];
|
|
PxReal kinematicScale1F32[4];
|
|
|
|
for (PxU32 a = 0; a < 4; ++a)
|
|
{
|
|
isDynamic = isDynamic || (descs[a].bodyState1 == PxSolverContactDesc::eDYNAMIC_BODY);
|
|
hasKinematic = hasKinematic || descs[a].bodyState1 == PxSolverContactDesc::eKINEMATIC_BODY;
|
|
kinematicScale0F32[a] = descs[a].body0->isKinematic ? 1.f : 0.f;
|
|
kinematicScale1F32[a] = descs[a].body1->isKinematic ? 1.f : 0.f;
|
|
}
|
|
|
|
/*BoolV kinematic0 = BLoad(isKinematic0);
|
|
BoolV kinematic1 = BLoad(isKinematic1);*/
|
|
|
|
const Vec4V kinematicScale0 = V4LoadU(kinematicScale0F32);
|
|
const Vec4V kinematicScale1 = V4LoadU(kinematicScale1F32);
|
|
|
|
const PxU32 constraintSize = sizeof(SolverContactPointStepBlock);
|
|
const PxU32 frictionSize = sizeof(SolverContactFrictionStepBlock);
|
|
|
|
PxU8* PX_RESTRICT ptr = workspace;
|
|
|
|
const Vec4V dom0 = invMassScale0;
|
|
const Vec4V dom1 = invMassScale1;
|
|
const Vec4V angDom0 = invInertiaScale0;
|
|
const Vec4V angDom1 = invInertiaScale1;
|
|
|
|
const Vec4V maxPenBias = V4Max(V4LoadXYZW(descs[0].bodyData0->penBiasClamp, descs[1].bodyData0->penBiasClamp,
|
|
descs[2].bodyData0->penBiasClamp, descs[3].bodyData0->penBiasClamp),
|
|
V4LoadXYZW(descs[0].bodyData1->penBiasClamp, descs[1].bodyData1->penBiasClamp,
|
|
descs[2].bodyData1->penBiasClamp, descs[3].bodyData1->penBiasClamp));
|
|
|
|
const Vec4V restDistance = V4LoadXYZW(descs[0].restDistance, descs[1].restDistance, descs[2].restDistance,
|
|
descs[3].restDistance);
|
|
|
|
//load up velocities
|
|
Vec4V linVel00 = V4LoadA(&descs[0].bodyData0->originalLinearVelocity.x);
|
|
Vec4V linVel10 = V4LoadA(&descs[1].bodyData0->originalLinearVelocity.x);
|
|
Vec4V linVel20 = V4LoadA(&descs[2].bodyData0->originalLinearVelocity.x);
|
|
Vec4V linVel30 = V4LoadA(&descs[3].bodyData0->originalLinearVelocity.x);
|
|
|
|
Vec4V linVel01 = V4LoadA(&descs[0].bodyData1->originalLinearVelocity.x);
|
|
Vec4V linVel11 = V4LoadA(&descs[1].bodyData1->originalLinearVelocity.x);
|
|
Vec4V linVel21 = V4LoadA(&descs[2].bodyData1->originalLinearVelocity.x);
|
|
Vec4V linVel31 = V4LoadA(&descs[3].bodyData1->originalLinearVelocity.x);
|
|
|
|
Vec4V angVel00 = V4LoadA(&descs[0].bodyData0->originalAngularVelocity.x);
|
|
Vec4V angVel10 = V4LoadA(&descs[1].bodyData0->originalAngularVelocity.x);
|
|
Vec4V angVel20 = V4LoadA(&descs[2].bodyData0->originalAngularVelocity.x);
|
|
Vec4V angVel30 = V4LoadA(&descs[3].bodyData0->originalAngularVelocity.x);
|
|
|
|
Vec4V angVel01 = V4LoadA(&descs[0].bodyData1->originalAngularVelocity.x);
|
|
Vec4V angVel11 = V4LoadA(&descs[1].bodyData1->originalAngularVelocity.x);
|
|
Vec4V angVel21 = V4LoadA(&descs[2].bodyData1->originalAngularVelocity.x);
|
|
Vec4V angVel31 = V4LoadA(&descs[3].bodyData1->originalAngularVelocity.x);
|
|
|
|
Vec4V linVelT00, linVelT10, linVelT20;
|
|
Vec4V linVelT01, linVelT11, linVelT21;
|
|
Vec4V angVelT00, angVelT10, angVelT20;
|
|
Vec4V angVelT01, angVelT11, angVelT21;
|
|
|
|
PX_TRANSPOSE_44_34(linVel00, linVel10, linVel20, linVel30, linVelT00, linVelT10, linVelT20);
|
|
PX_TRANSPOSE_44_34(linVel01, linVel11, linVel21, linVel31, linVelT01, linVelT11, linVelT21);
|
|
PX_TRANSPOSE_44_34(angVel00, angVel10, angVel20, angVel30, angVelT00, angVelT10, angVelT20);
|
|
PX_TRANSPOSE_44_34(angVel01, angVel11, angVel21, angVel31, angVelT01, angVelT11, angVelT21);
|
|
|
|
const Vec4V vrelX = V4Sub(linVelT00, linVelT01);
|
|
const Vec4V vrelY = V4Sub(linVelT10, linVelT11);
|
|
const Vec4V vrelZ = V4Sub(linVelT20, linVelT21);
|
|
|
|
//Load up masses and invInertia
|
|
|
|
const Vec4V invMass0 = V4LoadXYZW(descs[0].bodyData0->invMass, descs[1].bodyData0->invMass, descs[2].bodyData0->invMass, descs[3].bodyData0->invMass);
|
|
const Vec4V invMass1 = V4LoadXYZW(descs[0].bodyData1->invMass, descs[1].bodyData1->invMass, descs[2].bodyData1->invMass, descs[3].bodyData1->invMass);
|
|
|
|
const Vec4V invMass0D0 = V4Mul(dom0, invMass0);
|
|
const Vec4V invMass1D1 = V4Mul(dom1, invMass1);
|
|
|
|
Vec4V invInertia00X = V4LoadU(&descs[0].body0TxI->sqrtInvInertia.column0.x); // PT: safe because 'column1' follows 'column0' in PxMat33
|
|
Vec4V invInertia00Y = V4LoadU(&descs[0].body0TxI->sqrtInvInertia.column1.x); // PT: safe because 'column2' follows 'column1' in PxMat33
|
|
Vec4V invInertia00Z = Vec4V_From_Vec3V(V3LoadU(descs[0].body0TxI->sqrtInvInertia.column2));
|
|
|
|
Vec4V invInertia10X = V4LoadU(&descs[1].body0TxI->sqrtInvInertia.column0.x); // PT: safe because 'column1' follows 'column0' in PxMat33
|
|
Vec4V invInertia10Y = V4LoadU(&descs[1].body0TxI->sqrtInvInertia.column1.x); // PT: safe because 'column2' follows 'column1' in PxMat33
|
|
Vec4V invInertia10Z = Vec4V_From_Vec3V(V3LoadU(descs[1].body0TxI->sqrtInvInertia.column2));
|
|
|
|
Vec4V invInertia20X = V4LoadU(&descs[2].body0TxI->sqrtInvInertia.column0.x); // PT: safe because 'column1' follows 'column0' in PxMat33
|
|
Vec4V invInertia20Y = V4LoadU(&descs[2].body0TxI->sqrtInvInertia.column1.x); // PT: safe because 'column2' follows 'column1' in PxMat33
|
|
Vec4V invInertia20Z = Vec4V_From_Vec3V(V3LoadU(descs[2].body0TxI->sqrtInvInertia.column2));
|
|
|
|
Vec4V invInertia30X = V4LoadU(&descs[3].body0TxI->sqrtInvInertia.column0.x); // PT: safe because 'column1' follows 'column0' in PxMat33
|
|
Vec4V invInertia30Y = V4LoadU(&descs[3].body0TxI->sqrtInvInertia.column1.x); // PT: safe because 'column2' follows 'column1' in PxMat33
|
|
Vec4V invInertia30Z = Vec4V_From_Vec3V(V3LoadU(descs[3].body0TxI->sqrtInvInertia.column2));
|
|
|
|
Vec4V invInertia01X = V4LoadU(&descs[0].body1TxI->sqrtInvInertia.column0.x); // PT: safe because 'column1' follows 'column0' in PxMat33
|
|
Vec4V invInertia01Y = V4LoadU(&descs[0].body1TxI->sqrtInvInertia.column1.x); // PT: safe because 'column2' follows 'column1' in PxMat33
|
|
Vec4V invInertia01Z = Vec4V_From_Vec3V(V3LoadU(descs[0].body1TxI->sqrtInvInertia.column2));
|
|
|
|
Vec4V invInertia11X = V4LoadU(&descs[1].body1TxI->sqrtInvInertia.column0.x); // PT: safe because 'column1' follows 'column0' in PxMat33
|
|
Vec4V invInertia11Y = V4LoadU(&descs[1].body1TxI->sqrtInvInertia.column1.x); // PT: safe because 'column2' follows 'column1' in PxMat33
|
|
Vec4V invInertia11Z = Vec4V_From_Vec3V(V3LoadU(descs[1].body1TxI->sqrtInvInertia.column2));
|
|
|
|
Vec4V invInertia21X = V4LoadU(&descs[2].body1TxI->sqrtInvInertia.column0.x); // PT: safe because 'column1' follows 'column0' in PxMat33
|
|
Vec4V invInertia21Y = V4LoadU(&descs[2].body1TxI->sqrtInvInertia.column1.x); // PT: safe because 'column2' follows 'column1' in PxMat33
|
|
Vec4V invInertia21Z = Vec4V_From_Vec3V(V3LoadU(descs[2].body1TxI->sqrtInvInertia.column2));
|
|
|
|
Vec4V invInertia31X = V4LoadU(&descs[3].body1TxI->sqrtInvInertia.column0.x); // PT: safe because 'column1' follows 'column0' in PxMat33
|
|
Vec4V invInertia31Y = V4LoadU(&descs[3].body1TxI->sqrtInvInertia.column1.x); // PT: safe because 'column2' follows 'column1' in PxMat33
|
|
Vec4V invInertia31Z = Vec4V_From_Vec3V(V3LoadU(descs[3].body1TxI->sqrtInvInertia.column2));
|
|
|
|
Vec4V invInertia0X0, invInertia0X1, invInertia0X2;
|
|
Vec4V invInertia0Y0, invInertia0Y1, invInertia0Y2;
|
|
Vec4V invInertia0Z0, invInertia0Z1, invInertia0Z2;
|
|
|
|
Vec4V invInertia1X0, invInertia1X1, invInertia1X2;
|
|
Vec4V invInertia1Y0, invInertia1Y1, invInertia1Y2;
|
|
Vec4V invInertia1Z0, invInertia1Z1, invInertia1Z2;
|
|
|
|
PX_TRANSPOSE_44_34(invInertia00X, invInertia10X, invInertia20X, invInertia30X, invInertia0X0, invInertia0Y0, invInertia0Z0);
|
|
PX_TRANSPOSE_44_34(invInertia00Y, invInertia10Y, invInertia20Y, invInertia30Y, invInertia0X1, invInertia0Y1, invInertia0Z1);
|
|
PX_TRANSPOSE_44_34(invInertia00Z, invInertia10Z, invInertia20Z, invInertia30Z, invInertia0X2, invInertia0Y2, invInertia0Z2);
|
|
|
|
PX_TRANSPOSE_44_34(invInertia01X, invInertia11X, invInertia21X, invInertia31X, invInertia1X0, invInertia1Y0, invInertia1Z0);
|
|
PX_TRANSPOSE_44_34(invInertia01Y, invInertia11Y, invInertia21Y, invInertia31Y, invInertia1X1, invInertia1Y1, invInertia1Z1);
|
|
PX_TRANSPOSE_44_34(invInertia01Z, invInertia11Z, invInertia21Z, invInertia31Z, invInertia1X2, invInertia1Y2, invInertia1Z2);
|
|
|
|
const FloatV invDt = FLoad(invDtF32);
|
|
const PxReal scale = PxMin(0.8f, biasCoefficient);
|
|
|
|
const FloatV p8 = FLoad(scale);
|
|
const FloatV frictionBiasScale = FMul(invDt, p8);
|
|
const Vec4V totalDt = V4Load(totalDtF32);
|
|
const FloatV invTotalDt = FLoad(invTotalDtF32);
|
|
|
|
const Vec4V p84 = V4Splat(p8);
|
|
const Vec4V bounceThreshold = V4Splat(FLoad(bounceThresholdF32));
|
|
|
|
const Vec4V invDtp8 = V4Splat(FMul(invDt, p8));
|
|
|
|
const FloatV dt = FLoad(dtF32);
|
|
|
|
Vec4V bodyFrame00p4 = V4LoadU(&descs[0].bodyFrame0.p.x); // PT: safe because of compile-time-assert in PxTGSSolverConstraintPrepDescBase
|
|
Vec4V bodyFrame01p4 = V4LoadU(&descs[1].bodyFrame0.p.x); // PT: safe because of compile-time-assert in PxTGSSolverConstraintPrepDescBase
|
|
Vec4V bodyFrame02p4 = V4LoadU(&descs[2].bodyFrame0.p.x); // PT: safe because of compile-time-assert in PxTGSSolverConstraintPrepDescBase
|
|
Vec4V bodyFrame03p4 = V4LoadU(&descs[3].bodyFrame0.p.x); // PT: safe because of compile-time-assert in PxTGSSolverConstraintPrepDescBase
|
|
|
|
Vec4V bodyFrame0pX, bodyFrame0pY, bodyFrame0pZ;
|
|
PX_TRANSPOSE_44_34(bodyFrame00p4, bodyFrame01p4, bodyFrame02p4, bodyFrame03p4, bodyFrame0pX, bodyFrame0pY, bodyFrame0pZ);
|
|
|
|
Vec4V bodyFrame10p4 = V4LoadU(&descs[0].bodyFrame1.p.x); // PT: safe because of compile-time-assert in PxTGSSolverConstraintPrepDescBase
|
|
Vec4V bodyFrame11p4 = V4LoadU(&descs[1].bodyFrame1.p.x); // PT: safe because of compile-time-assert in PxTGSSolverConstraintPrepDescBase
|
|
Vec4V bodyFrame12p4 = V4LoadU(&descs[2].bodyFrame1.p.x); // PT: safe because of compile-time-assert in PxTGSSolverConstraintPrepDescBase
|
|
Vec4V bodyFrame13p4 = V4LoadU(&descs[3].bodyFrame1.p.x); // PT: safe because of compile-time-assert in PxTGSSolverConstraintPrepDescBase
|
|
|
|
Vec4V bodyFrame1pX, bodyFrame1pY, bodyFrame1pZ;
|
|
PX_TRANSPOSE_44_34(bodyFrame10p4, bodyFrame11p4, bodyFrame12p4, bodyFrame13p4, bodyFrame1pX, bodyFrame1pY, bodyFrame1pZ);
|
|
|
|
const QuatV bodyFrame00q = QuatVLoadU(&descs[0].bodyFrame0.q.x);
|
|
const QuatV bodyFrame01q = QuatVLoadU(&descs[1].bodyFrame0.q.x);
|
|
const QuatV bodyFrame02q = QuatVLoadU(&descs[2].bodyFrame0.q.x);
|
|
const QuatV bodyFrame03q = QuatVLoadU(&descs[3].bodyFrame0.q.x);
|
|
|
|
const QuatV bodyFrame10q = QuatVLoadU(&descs[0].bodyFrame1.q.x);
|
|
const QuatV bodyFrame11q = QuatVLoadU(&descs[1].bodyFrame1.q.x);
|
|
const QuatV bodyFrame12q = QuatVLoadU(&descs[2].bodyFrame1.q.x);
|
|
const QuatV bodyFrame13q = QuatVLoadU(&descs[3].bodyFrame1.q.x);
|
|
|
|
PxU32 frictionPatchWritebackAddrIndex0 = 0;
|
|
PxU32 frictionPatchWritebackAddrIndex1 = 0;
|
|
PxU32 frictionPatchWritebackAddrIndex2 = 0;
|
|
PxU32 frictionPatchWritebackAddrIndex3 = 0;
|
|
|
|
PxPrefetchLine(c.contactID);
|
|
PxPrefetchLine(c.contactID, 128);
|
|
|
|
PxU32 frictionIndex0 = 0, frictionIndex1 = 0, frictionIndex2 = 0, frictionIndex3 = 0;
|
|
//PxU32 contactIndex0 = 0, contactIndex1 = 0, contactIndex2 = 0, contactIndex3 = 0;
|
|
|
|
//OK, we iterate through all friction patch counts in the constraint patch, building up the constraint list etc.
|
|
|
|
PxU32 maxPatches = PxMax(descs[0].numFrictionPatches, PxMax(descs[1].numFrictionPatches, PxMax(descs[2].numFrictionPatches, descs[3].numFrictionPatches)));
|
|
|
|
const Vec4V p1 = V4Splat(FLoad(0.0001f));
|
|
const Vec4V orthoThreshold = V4Splat(FLoad(0.70710678f));
|
|
|
|
PxU32 contact0 = 0, contact1 = 0, contact2 = 0, contact3 = 0;
|
|
PxU32 patch0 = 0, patch1 = 0, patch2 = 0, patch3 = 0;
|
|
|
|
PxU8 flag = 0;
|
|
if (hasMaxImpulse)
|
|
flag |= SolverContactHeader4::eHAS_MAX_IMPULSE;
|
|
|
|
bool hasFinished[4];
|
|
|
|
for (PxU32 i = 0; i<maxPatches; i++)
|
|
{
|
|
hasFinished[0] = i >= descs[0].numFrictionPatches;
|
|
hasFinished[1] = i >= descs[1].numFrictionPatches;
|
|
hasFinished[2] = i >= descs[2].numFrictionPatches;
|
|
hasFinished[3] = i >= descs[3].numFrictionPatches;
|
|
|
|
frictionIndex0 = hasFinished[0] ? frictionIndex0 : descs[0].startFrictionPatchIndex + i;
|
|
frictionIndex1 = hasFinished[1] ? frictionIndex1 : descs[1].startFrictionPatchIndex + i;
|
|
frictionIndex2 = hasFinished[2] ? frictionIndex2 : descs[2].startFrictionPatchIndex + i;
|
|
frictionIndex3 = hasFinished[3] ? frictionIndex3 : descs[3].startFrictionPatchIndex + i;
|
|
|
|
PxU32 clampedContacts0 = hasFinished[0] ? 0 : c.frictionPatchContactCounts[frictionIndex0];
|
|
PxU32 clampedContacts1 = hasFinished[1] ? 0 : c.frictionPatchContactCounts[frictionIndex1];
|
|
PxU32 clampedContacts2 = hasFinished[2] ? 0 : c.frictionPatchContactCounts[frictionIndex2];
|
|
PxU32 clampedContacts3 = hasFinished[3] ? 0 : c.frictionPatchContactCounts[frictionIndex3];
|
|
|
|
PxU32 firstPatch0 = c.correlationListHeads[frictionIndex0];
|
|
PxU32 firstPatch1 = c.correlationListHeads[frictionIndex1];
|
|
PxU32 firstPatch2 = c.correlationListHeads[frictionIndex2];
|
|
PxU32 firstPatch3 = c.correlationListHeads[frictionIndex3];
|
|
|
|
const PxContactPoint* contactBase0 = descs[0].contacts + c.contactPatches[firstPatch0].start;
|
|
const PxContactPoint* contactBase1 = descs[1].contacts + c.contactPatches[firstPatch1].start;
|
|
const PxContactPoint* contactBase2 = descs[2].contacts + c.contactPatches[firstPatch2].start;
|
|
const PxContactPoint* contactBase3 = descs[3].contacts + c.contactPatches[firstPatch3].start;
|
|
|
|
// negative restitution
|
|
const Vec4V restitution = V4Neg(V4LoadXYZW(contactBase0->restitution, contactBase1->restitution, contactBase2->restitution,
|
|
contactBase3->restitution));
|
|
const Vec4V damping = V4LoadXYZW(contactBase0->damping, contactBase1->damping, contactBase2->damping, contactBase3->damping);
|
|
const bool accelSpring_[] = { !!(contactBase0->materialFlags & PxMaterialFlag::eCOMPLIANT_ACCELERATION_SPRING),
|
|
!!(contactBase1->materialFlags & PxMaterialFlag::eCOMPLIANT_ACCELERATION_SPRING),
|
|
!!(contactBase2->materialFlags & PxMaterialFlag::eCOMPLIANT_ACCELERATION_SPRING),
|
|
!!(contactBase3->materialFlags & PxMaterialFlag::eCOMPLIANT_ACCELERATION_SPRING) };
|
|
const BoolV accelSpring = BLoad(accelSpring_);
|
|
|
|
SolverContactHeaderStepBlock* PX_RESTRICT header = reinterpret_cast<SolverContactHeaderStepBlock*>(ptr);
|
|
ptr += sizeof(SolverContactHeaderStepBlock);
|
|
|
|
header->flags[0] = flags[0];
|
|
header->flags[1] = flags[1];
|
|
header->flags[2] = flags[2];
|
|
header->flags[3] = flags[3];
|
|
|
|
header->flag = flag;
|
|
|
|
PxU32 totalContacts = PxMax(clampedContacts0, PxMax(clampedContacts1, PxMax(clampedContacts2, clampedContacts3)));
|
|
|
|
Vec4V* PX_RESTRICT appliedNormalForces = reinterpret_cast<Vec4V*>(ptr);
|
|
ptr += sizeof(Vec4V)*totalContacts;
|
|
|
|
PxMemZero(appliedNormalForces, sizeof(Vec4V) * totalContacts);
|
|
|
|
header->numNormalConstr = PxTo8(totalContacts);
|
|
header->numNormalConstrs[0] = PxTo8(clampedContacts0);
|
|
header->numNormalConstrs[1] = PxTo8(clampedContacts1);
|
|
header->numNormalConstrs[2] = PxTo8(clampedContacts2);
|
|
header->numNormalConstrs[3] = PxTo8(clampedContacts3);
|
|
//header->sqrtInvMassA = sqrtInvMass0;
|
|
//header->sqrtInvMassB = sqrtInvMass1;
|
|
header->invMass0D0 = invMass0D0;
|
|
header->invMass1D1 = invMass1D1;
|
|
header->angDom0 = angDom0;
|
|
header->angDom1 = angDom1;
|
|
header->shapeInteraction[0] = getInteraction(descs[0]); header->shapeInteraction[1] = getInteraction(descs[1]);
|
|
header->shapeInteraction[2] = getInteraction(descs[2]); header->shapeInteraction[3] = getInteraction(descs[3]);
|
|
|
|
Vec4V* maxImpulse = reinterpret_cast<Vec4V*>(ptr + constraintSize * totalContacts);
|
|
|
|
//header->restitution = restitution;
|
|
|
|
Vec4V normal0 = V4LoadA(&contactBase0->normal.x);
|
|
Vec4V normal1 = V4LoadA(&contactBase1->normal.x);
|
|
Vec4V normal2 = V4LoadA(&contactBase2->normal.x);
|
|
Vec4V normal3 = V4LoadA(&contactBase3->normal.x);
|
|
|
|
Vec4V normalX, normalY, normalZ;
|
|
PX_TRANSPOSE_44_34(normal0, normal1, normal2, normal3, normalX, normalY, normalZ);
|
|
|
|
PX_ASSERT(ValidateVec4(normalX));
|
|
PX_ASSERT(ValidateVec4(normalY));
|
|
PX_ASSERT(ValidateVec4(normalZ));
|
|
|
|
header->normalX = normalX;
|
|
header->normalY = normalY;
|
|
header->normalZ = normalZ;
|
|
|
|
header->maxPenBias = maxPenBias;
|
|
|
|
const Vec4V norVel0 = V4MulAdd(normalZ, linVelT20, V4MulAdd(normalY, linVelT10, V4Mul(normalX, linVelT00)));
|
|
const Vec4V norVel1 = V4MulAdd(normalZ, linVelT21, V4MulAdd(normalY, linVelT11, V4Mul(normalX, linVelT01)));
|
|
const Vec4V relNorVel = V4Sub(norVel0, norVel1);
|
|
|
|
//For all correlation heads - need to pull this out I think
|
|
|
|
//OK, we have a counter for all our patches...
|
|
PxU32 finished = (PxU32(hasFinished[0])) |
|
|
((PxU32(hasFinished[1])) << 1) |
|
|
((PxU32(hasFinished[2])) << 2) |
|
|
((PxU32(hasFinished[3])) << 3);
|
|
|
|
CorrelationListIterator iter0(c, firstPatch0);
|
|
CorrelationListIterator iter1(c, firstPatch1);
|
|
CorrelationListIterator iter2(c, firstPatch2);
|
|
CorrelationListIterator iter3(c, firstPatch3);
|
|
|
|
//PxU32 contact0, contact1, contact2, contact3;
|
|
//PxU32 patch0, patch1, patch2, patch3;
|
|
|
|
if (!hasFinished[0])
|
|
iter0.nextContact(patch0, contact0);
|
|
if (!hasFinished[1])
|
|
iter1.nextContact(patch1, contact1);
|
|
if (!hasFinished[2])
|
|
iter2.nextContact(patch2, contact2);
|
|
if (!hasFinished[3])
|
|
iter3.nextContact(patch3, contact3);
|
|
|
|
PxU8* p = ptr;
|
|
|
|
PxU32 contactCount = 0;
|
|
PxU32 newFinished =
|
|
(PxU32(hasFinished[0] || !iter0.hasNextContact())) |
|
|
((PxU32(hasFinished[1] || !iter1.hasNextContact())) << 1) |
|
|
((PxU32(hasFinished[2] || !iter2.hasNextContact())) << 2) |
|
|
((PxU32(hasFinished[3] || !iter3.hasNextContact())) << 3);
|
|
|
|
BoolV bFinished = BLoad(hasFinished);
|
|
|
|
while (finished != 0xf)
|
|
{
|
|
finished = newFinished;
|
|
++contactCount;
|
|
PxPrefetchLine(p, 384);
|
|
PxPrefetchLine(p, 512);
|
|
PxPrefetchLine(p, 640);
|
|
|
|
SolverContactPointStepBlock* PX_RESTRICT solverContact = reinterpret_cast<SolverContactPointStepBlock*>(p);
|
|
p += constraintSize;
|
|
|
|
const PxContactPoint& con0 = descs[0].contacts[c.contactPatches[patch0].start + contact0];
|
|
const PxContactPoint& con1 = descs[1].contacts[c.contactPatches[patch1].start + contact1];
|
|
const PxContactPoint& con2 = descs[2].contacts[c.contactPatches[patch2].start + contact2];
|
|
const PxContactPoint& con3 = descs[3].contacts[c.contactPatches[patch3].start + contact3];
|
|
|
|
//Now we need to splice these 4 contacts into a single structure
|
|
|
|
{
|
|
Vec4V point0 = V4LoadA(&con0.point.x);
|
|
Vec4V point1 = V4LoadA(&con1.point.x);
|
|
Vec4V point2 = V4LoadA(&con2.point.x);
|
|
Vec4V point3 = V4LoadA(&con3.point.x);
|
|
|
|
Vec4V pointX, pointY, pointZ;
|
|
PX_TRANSPOSE_44_34(point0, point1, point2, point3, pointX, pointY, pointZ);
|
|
|
|
Vec4V cTargetVel0 = V4LoadA(&con0.targetVel.x);
|
|
Vec4V cTargetVel1 = V4LoadA(&con1.targetVel.x);
|
|
Vec4V cTargetVel2 = V4LoadA(&con2.targetVel.x);
|
|
Vec4V cTargetVel3 = V4LoadA(&con3.targetVel.x);
|
|
|
|
Vec4V cTargetVelX, cTargetVelY, cTargetVelZ;
|
|
PX_TRANSPOSE_44_34(cTargetVel0, cTargetVel1, cTargetVel2, cTargetVel3, cTargetVelX, cTargetVelY, cTargetVelZ);
|
|
|
|
const Vec4V separation = V4LoadXYZW(con0.separation, con1.separation, con2.separation, con3.separation);
|
|
|
|
const Vec4V cTargetNorVel = V4MulAdd(cTargetVelX, normalX, V4MulAdd(cTargetVelY, normalY, V4Mul(cTargetVelZ, normalZ)));
|
|
|
|
const Vec4V raX = V4Sub(pointX, bodyFrame0pX);
|
|
const Vec4V raY = V4Sub(pointY, bodyFrame0pY);
|
|
const Vec4V raZ = V4Sub(pointZ, bodyFrame0pZ);
|
|
|
|
const Vec4V rbX = V4Sub(pointX, bodyFrame1pX);
|
|
const Vec4V rbY = V4Sub(pointY, bodyFrame1pY);
|
|
const Vec4V rbZ = V4Sub(pointZ, bodyFrame1pZ);
|
|
|
|
/*raX = V4Sel(V4IsGrtr(solverOffsetSlop, V4Abs(raX)), zero, raX);
|
|
raY = V4Sel(V4IsGrtr(solverOffsetSlop, V4Abs(raY)), zero, raY);
|
|
raZ = V4Sel(V4IsGrtr(solverOffsetSlop, V4Abs(raZ)), zero, raZ);
|
|
|
|
rbX = V4Sel(V4IsGrtr(solverOffsetSlop, V4Abs(rbX)), zero, rbX);
|
|
rbY = V4Sel(V4IsGrtr(solverOffsetSlop, V4Abs(rbY)), zero, rbY);
|
|
rbZ = V4Sel(V4IsGrtr(solverOffsetSlop, V4Abs(rbZ)), zero, rbZ);*/
|
|
|
|
PX_ASSERT(ValidateVec4(raX));
|
|
PX_ASSERT(ValidateVec4(raY));
|
|
PX_ASSERT(ValidateVec4(raZ));
|
|
|
|
PX_ASSERT(ValidateVec4(rbX));
|
|
PX_ASSERT(ValidateVec4(rbY));
|
|
PX_ASSERT(ValidateVec4(rbZ));
|
|
|
|
//raXn = cross(ra, normal) which = Vec3V( a.y*b.z-a.z*b.y, a.z*b.x-a.x*b.z, a.x*b.y-a.y*b.x);
|
|
|
|
Vec4V raXnX = V4NegMulSub(raZ, normalY, V4Mul(raY, normalZ));
|
|
Vec4V raXnY = V4NegMulSub(raX, normalZ, V4Mul(raZ, normalX));
|
|
Vec4V raXnZ = V4NegMulSub(raY, normalX, V4Mul(raX, normalY));
|
|
|
|
Vec4V rbXnX = V4NegMulSub(rbZ, normalY, V4Mul(rbY, normalZ));
|
|
Vec4V rbXnY = V4NegMulSub(rbX, normalZ, V4Mul(rbZ, normalX));
|
|
Vec4V rbXnZ = V4NegMulSub(rbY, normalX, V4Mul(rbX, normalY));
|
|
|
|
const Vec4V relAngVel0 = V4MulAdd(raXnZ, angVelT20, V4MulAdd(raXnY, angVelT10, V4Mul(raXnX, angVelT00)));
|
|
const Vec4V relAngVel1 = V4MulAdd(rbXnZ, angVelT21, V4MulAdd(rbXnY, angVelT11, V4Mul(rbXnX, angVelT01)));
|
|
|
|
const Vec4V relAng = V4Sub(relAngVel0, relAngVel1);
|
|
|
|
const Vec4V slop = V4Mul(solverOffsetSlop, V4Max(V4Sel(V4IsEq(relNorVel, zero), V4Splat(FMax()), V4Div(relAng, relNorVel)), V4One()));
|
|
|
|
raXnX = V4Sel(V4IsGrtr(slop, V4Abs(raXnX)), zero, raXnX);
|
|
raXnY = V4Sel(V4IsGrtr(slop, V4Abs(raXnY)), zero, raXnY);
|
|
raXnZ = V4Sel(V4IsGrtr(slop, V4Abs(raXnZ)), zero, raXnZ);
|
|
|
|
Vec4V delAngVel0X = V4Mul(invInertia0X0, raXnX);
|
|
Vec4V delAngVel0Y = V4Mul(invInertia0X1, raXnX);
|
|
Vec4V delAngVel0Z = V4Mul(invInertia0X2, raXnX);
|
|
|
|
delAngVel0X = V4MulAdd(invInertia0Y0, raXnY, delAngVel0X);
|
|
delAngVel0Y = V4MulAdd(invInertia0Y1, raXnY, delAngVel0Y);
|
|
delAngVel0Z = V4MulAdd(invInertia0Y2, raXnY, delAngVel0Z);
|
|
|
|
delAngVel0X = V4MulAdd(invInertia0Z0, raXnZ, delAngVel0X);
|
|
delAngVel0Y = V4MulAdd(invInertia0Z1, raXnZ, delAngVel0Y);
|
|
delAngVel0Z = V4MulAdd(invInertia0Z2, raXnZ, delAngVel0Z);
|
|
|
|
PX_ASSERT(ValidateVec4(delAngVel0X));
|
|
PX_ASSERT(ValidateVec4(delAngVel0Y));
|
|
PX_ASSERT(ValidateVec4(delAngVel0Z));
|
|
|
|
const Vec4V dotDelAngVel0 = V4MulAdd(delAngVel0X, delAngVel0X, V4MulAdd(delAngVel0Y, delAngVel0Y, V4Mul(delAngVel0Z, delAngVel0Z)));
|
|
|
|
Vec4V unitResponse = V4MulAdd(dotDelAngVel0, angDom0, invMass0D0);
|
|
Vec4V vrel0 = V4Add(norVel0, relAngVel0);
|
|
Vec4V vrel1 = V4Add(norVel1, relAngVel1);
|
|
|
|
Vec4V delAngVel1X = zero;
|
|
Vec4V delAngVel1Y = zero;
|
|
Vec4V delAngVel1Z = zero;
|
|
|
|
//The dynamic-only parts - need to if-statement these up. A branch here shouldn't cost us too much
|
|
if (isDynamic)
|
|
{
|
|
rbXnX = V4Sel(V4IsGrtr(slop, V4Abs(rbXnX)), zero, rbXnX);
|
|
rbXnY = V4Sel(V4IsGrtr(slop, V4Abs(rbXnY)), zero, rbXnY);
|
|
rbXnZ = V4Sel(V4IsGrtr(slop, V4Abs(rbXnZ)), zero, rbXnZ);
|
|
|
|
delAngVel1X = V4Mul(invInertia1X0, rbXnX);
|
|
delAngVel1Y = V4Mul(invInertia1X1, rbXnX);
|
|
delAngVel1Z = V4Mul(invInertia1X2, rbXnX);
|
|
|
|
delAngVel1X = V4MulAdd(invInertia1Y0, rbXnY, delAngVel1X);
|
|
delAngVel1Y = V4MulAdd(invInertia1Y1, rbXnY, delAngVel1Y);
|
|
delAngVel1Z = V4MulAdd(invInertia1Y2, rbXnY, delAngVel1Z);
|
|
|
|
delAngVel1X = V4MulAdd(invInertia1Z0, rbXnZ, delAngVel1X);
|
|
delAngVel1Y = V4MulAdd(invInertia1Z1, rbXnZ, delAngVel1Y);
|
|
delAngVel1Z = V4MulAdd(invInertia1Z2, rbXnZ, delAngVel1Z);
|
|
|
|
PX_ASSERT(ValidateVec4(delAngVel1X));
|
|
PX_ASSERT(ValidateVec4(delAngVel1Y));
|
|
PX_ASSERT(ValidateVec4(delAngVel1Z));
|
|
|
|
const Vec4V dotDelAngVel1 = V4MulAdd(delAngVel1X, delAngVel1X, V4MulAdd(delAngVel1Y, delAngVel1Y, V4Mul(delAngVel1Z, delAngVel1Z)));
|
|
|
|
const Vec4V resp1 = V4MulAdd(dotDelAngVel1, angDom1, invMass1D1);
|
|
|
|
unitResponse = V4Add(unitResponse, resp1);
|
|
}
|
|
|
|
Vec4V vrel = V4Sub(vrel0, vrel1);
|
|
|
|
solverContact->rbXnI[0] = delAngVel1X;
|
|
solverContact->rbXnI[1] = delAngVel1Y;
|
|
solverContact->rbXnI[2] = delAngVel1Z;
|
|
|
|
Vec4V penetration = V4Sub(separation, restDistance);
|
|
|
|
const Vec4V penetrationInvDt = V4Scale(penetration, invTotalDt);
|
|
const BoolV isSeparated = V4IsGrtr(penetration, zero);
|
|
|
|
const BoolV isGreater2 = BAnd(BAnd(V4IsGrtr(zero, restitution), V4IsGrtr(bounceThreshold, vrel)),
|
|
V4IsGrtr(V4Neg(vrel), penetrationInvDt));
|
|
|
|
const Vec4V ratio = V4Sel(isGreater2, V4Add(totalDt, V4Div(penetration, vrel)), zero);
|
|
|
|
const Vec4V recipResponse = V4Sel(V4IsGrtr(unitResponse, zero), V4Recip(unitResponse), zero);
|
|
|
|
//Restitution is negated in the block setup code
|
|
// rdt, a, b, x only needed in compliant case
|
|
const BoolV isCompliant = V4IsGrtr(restitution, zero);
|
|
const Vec4V rdt = V4Scale(restitution, dt);
|
|
const BoolV collidingWithVrel = V4IsGrtr(V4Neg(vrel), penetrationInvDt);
|
|
const Vec4V dampingIfEnabled = V4Sel(BAndNot(isSeparated, collidingWithVrel), zero, damping);
|
|
const Vec4V a = V4Scale(V4Add(dampingIfEnabled, rdt), dt);
|
|
const Vec4V massIfAccelElseOne = V4Sel(accelSpring, recipResponse, one);
|
|
const Vec4V oneIfAccelElseR = V4Sel(accelSpring, one, unitResponse);
|
|
const Vec4V x = V4Recip(V4MulAdd(a, oneIfAccelElseR, one));
|
|
const Vec4V velMultiplier = V4Sel(isCompliant, V4Mul(V4Mul(x, a), massIfAccelElseOne), recipResponse);
|
|
|
|
// biasCoeff includes the unit response s.t. velDeltaFromPosError = separation*biasCoeff
|
|
const Vec4V scaledBias = V4Neg(V4Sel(isCompliant, V4Mul(rdt, V4Mul(x, oneIfAccelElseR)), V4Sel(isSeparated, V4Splat(invDt), invDtp8)));
|
|
|
|
Vec4V targetVelocity = V4NegMulSub(vrel0, kinematicScale0, V4MulAdd(vrel1, kinematicScale1, V4Sel(isGreater2, V4Mul(vrel, restitution), zero)));
|
|
|
|
penetration = V4MulAdd(targetVelocity, ratio, penetration);
|
|
|
|
//Vec4V biasedErr = V4Sel(isGreater2, targetVelocity, scaledBias);
|
|
//Vec4V biasedErr = V4Add(targetVelocity, scaledBias);
|
|
|
|
//biasedErr = V4NegMulSub(V4Sub(vrel, cTargetNorVel), velMultiplier, biasedErr);
|
|
|
|
//These values are present for static and dynamic contacts
|
|
solverContact->raXnI[0] = delAngVel0X;
|
|
solverContact->raXnI[1] = delAngVel0Y;
|
|
solverContact->raXnI[2] = delAngVel0Z;
|
|
solverContact->velMultiplier = V4Sel(bFinished, zero, velMultiplier);
|
|
solverContact->targetVelocity = V4Add(cTargetNorVel, targetVelocity);
|
|
solverContact->separation = penetration;
|
|
solverContact->biasCoefficient = V4Sel(bFinished, zero, scaledBias);
|
|
solverContact->recipResponse = V4Sel(bFinished, zero, recipResponse);
|
|
|
|
if (hasMaxImpulse)
|
|
{
|
|
maxImpulse[contactCount - 1] = V4Merge(FLoad(con0.maxImpulse), FLoad(con1.maxImpulse), FLoad(con2.maxImpulse),
|
|
FLoad(con3.maxImpulse));
|
|
}
|
|
}
|
|
if (!(finished & 0x1))
|
|
{
|
|
iter0.nextContact(patch0, contact0);
|
|
newFinished |= PxU32(!iter0.hasNextContact());
|
|
}
|
|
else
|
|
bFinished = BSetX(bFinished, bTrue);
|
|
|
|
if (!(finished & 0x2))
|
|
{
|
|
iter1.nextContact(patch1, contact1);
|
|
newFinished |= (PxU32(!iter1.hasNextContact()) << 1);
|
|
}
|
|
else
|
|
bFinished = BSetY(bFinished, bTrue);
|
|
|
|
if (!(finished & 0x4))
|
|
{
|
|
iter2.nextContact(patch2, contact2);
|
|
newFinished |= (PxU32(!iter2.hasNextContact()) << 2);
|
|
}
|
|
else
|
|
bFinished = BSetZ(bFinished, bTrue);
|
|
|
|
if (!(finished & 0x8))
|
|
{
|
|
iter3.nextContact(patch3, contact3);
|
|
newFinished |= (PxU32(!iter3.hasNextContact()) << 3);
|
|
}
|
|
else
|
|
bFinished = BSetW(bFinished, bTrue);
|
|
}
|
|
ptr = p;
|
|
if (hasMaxImpulse)
|
|
{
|
|
ptr += sizeof(Vec4V) * totalContacts;
|
|
}
|
|
|
|
//OK...friction time :-)
|
|
|
|
Vec4V maxImpulseScale = V4One();
|
|
{
|
|
const FrictionPatch& frictionPatch0 = c.frictionPatches[frictionIndex0];
|
|
const FrictionPatch& frictionPatch1 = c.frictionPatches[frictionIndex1];
|
|
const FrictionPatch& frictionPatch2 = c.frictionPatches[frictionIndex2];
|
|
const FrictionPatch& frictionPatch3 = c.frictionPatches[frictionIndex3];
|
|
|
|
PxU32 anchorCount0 = frictionPatch0.anchorCount;
|
|
PxU32 anchorCount1 = frictionPatch1.anchorCount;
|
|
PxU32 anchorCount2 = frictionPatch2.anchorCount;
|
|
PxU32 anchorCount3 = frictionPatch3.anchorCount;
|
|
|
|
PxU32 clampedAnchorCount0 = hasFinished[0] || (contactBase0->materialFlags & PxMaterialFlag::eDISABLE_FRICTION) ? 0 : anchorCount0;
|
|
PxU32 clampedAnchorCount1 = hasFinished[1] || (contactBase1->materialFlags & PxMaterialFlag::eDISABLE_FRICTION) ? 0 : anchorCount1;
|
|
PxU32 clampedAnchorCount2 = hasFinished[2] || (contactBase2->materialFlags & PxMaterialFlag::eDISABLE_FRICTION) ? 0 : anchorCount2;
|
|
PxU32 clampedAnchorCount3 = hasFinished[3] || (contactBase3->materialFlags & PxMaterialFlag::eDISABLE_FRICTION) ? 0 : anchorCount3;
|
|
|
|
const PxU32 maxAnchorCount = PxMax(clampedAnchorCount0, PxMax(clampedAnchorCount1, PxMax(clampedAnchorCount2, clampedAnchorCount3)));
|
|
|
|
PX_ALIGN(16, PxReal staticFriction[4]);
|
|
PX_ALIGN(16, PxReal dynamicFriction[4]);
|
|
|
|
//for (PxU32 f = 0; f < 4; ++f)
|
|
{
|
|
PxReal coeff0 = (clampedAnchorCount0 == 2) ? 0.5f : 1.f;
|
|
PxReal coeff1 = (clampedAnchorCount1 == 2) ? 0.5f : 1.f;
|
|
PxReal coeff2 = (clampedAnchorCount2 == 2) ? 0.5f : 1.f;
|
|
PxReal coeff3 = (clampedAnchorCount3 == 2) ? 0.5f : 1.f;
|
|
|
|
staticFriction[0] = contactBase0->staticFriction * coeff0;
|
|
dynamicFriction[0] = contactBase0->dynamicFriction * coeff0;
|
|
staticFriction[1] = contactBase1->staticFriction * coeff1;
|
|
dynamicFriction[1] = contactBase1->dynamicFriction * coeff1;
|
|
staticFriction[2] = contactBase2->staticFriction * coeff2;
|
|
dynamicFriction[2] = contactBase2->dynamicFriction * coeff2;
|
|
staticFriction[3] = contactBase3->staticFriction * coeff3;
|
|
dynamicFriction[3] = contactBase3->dynamicFriction * coeff3;
|
|
}
|
|
|
|
PX_ASSERT(totalContacts == contactCount);
|
|
|
|
header->numFrictionConstr = PxTo8(maxAnchorCount * 2);
|
|
header->numFrictionConstrs[0] = PxTo8(clampedAnchorCount0 * 2);
|
|
header->numFrictionConstrs[1] = PxTo8(clampedAnchorCount1 * 2);
|
|
header->numFrictionConstrs[2] = PxTo8(clampedAnchorCount2 * 2);
|
|
header->numFrictionConstrs[3] = PxTo8(clampedAnchorCount3 * 2);
|
|
|
|
//KS - TODO - extend this if needed
|
|
header->type = PxTo8(DY_SC_TYPE_BLOCK_RB_CONTACT);
|
|
|
|
if (maxAnchorCount)
|
|
{
|
|
const BoolV cond = V4IsGrtr(orthoThreshold, V4Abs(normalX));
|
|
|
|
const Vec4V t0FallbackX = V4Sel(cond, zero, V4Neg(normalY));
|
|
const Vec4V t0FallbackY = V4Sel(cond, V4Neg(normalZ), normalX);
|
|
const Vec4V t0FallbackZ = V4Sel(cond, normalY, zero);
|
|
|
|
//const Vec4V dotNormalVrel = V4MulAdd(normalZ, vrelZ, V4MulAdd(normalY, vrelY, V4Mul(normalX, vrelX)));
|
|
const Vec4V vrelSubNorVelX = V4NegMulSub(normalX, relNorVel, vrelX);
|
|
const Vec4V vrelSubNorVelY = V4NegMulSub(normalY, relNorVel, vrelY);
|
|
const Vec4V vrelSubNorVelZ = V4NegMulSub(normalZ, relNorVel, vrelZ);
|
|
|
|
const Vec4V lenSqvrelSubNorVelZ = V4MulAdd(vrelSubNorVelX, vrelSubNorVelX, V4MulAdd(vrelSubNorVelY, vrelSubNorVelY, V4Mul(vrelSubNorVelZ, vrelSubNorVelZ)));
|
|
|
|
const BoolV bcon2 = V4IsGrtr(lenSqvrelSubNorVelZ, p1);
|
|
|
|
Vec4V t0X = V4Sel(bcon2, vrelSubNorVelX, t0FallbackX);
|
|
Vec4V t0Y = V4Sel(bcon2, vrelSubNorVelY, t0FallbackY);
|
|
Vec4V t0Z = V4Sel(bcon2, vrelSubNorVelZ, t0FallbackZ);
|
|
|
|
//Now normalize this...
|
|
const Vec4V recipLen = V4Rsqrt(V4MulAdd(t0Z, t0Z, V4MulAdd(t0Y, t0Y, V4Mul(t0X, t0X))));
|
|
|
|
t0X = V4Mul(t0X, recipLen);
|
|
t0Y = V4Mul(t0Y, recipLen);
|
|
t0Z = V4Mul(t0Z, recipLen);
|
|
|
|
Vec4V t1X = V4NegMulSub(normalZ, t0Y, V4Mul(normalY, t0Z));
|
|
Vec4V t1Y = V4NegMulSub(normalX, t0Z, V4Mul(normalZ, t0X));
|
|
Vec4V t1Z = V4NegMulSub(normalY, t0X, V4Mul(normalX, t0Y));
|
|
|
|
PX_ASSERT((uintptr_t(descs[0].frictionPtr) & 0xF) == 0);
|
|
PX_ASSERT((uintptr_t(descs[1].frictionPtr) & 0xF) == 0);
|
|
PX_ASSERT((uintptr_t(descs[2].frictionPtr) & 0xF) == 0);
|
|
PX_ASSERT((uintptr_t(descs[3].frictionPtr) & 0xF) == 0);
|
|
|
|
PxU8* PX_RESTRICT writeback0 = descs[0].frictionPtr + frictionPatchWritebackAddrIndex0 * sizeof(FrictionPatch);
|
|
PxU8* PX_RESTRICT writeback1 = descs[1].frictionPtr + frictionPatchWritebackAddrIndex1 * sizeof(FrictionPatch);
|
|
PxU8* PX_RESTRICT writeback2 = descs[2].frictionPtr + frictionPatchWritebackAddrIndex2 * sizeof(FrictionPatch);
|
|
PxU8* PX_RESTRICT writeback3 = descs[3].frictionPtr + frictionPatchWritebackAddrIndex3 * sizeof(FrictionPatch);
|
|
|
|
PxU32 index0 = 0, index1 = 0, index2 = 0, index3 = 0;
|
|
|
|
header->broken = bFalse;
|
|
header->frictionBrokenWritebackByte[0] = writeback0;
|
|
header->frictionBrokenWritebackByte[1] = writeback1;
|
|
header->frictionBrokenWritebackByte[2] = writeback2;
|
|
header->frictionBrokenWritebackByte[3] = writeback3;
|
|
|
|
/*header->frictionNormal[0][0] = t0X;
|
|
header->frictionNormal[0][1] = t0Y;
|
|
header->frictionNormal[0][2] = t0Z;
|
|
|
|
header->frictionNormal[1][0] = t1X;
|
|
header->frictionNormal[1][1] = t1Y;
|
|
header->frictionNormal[1][2] = t1Z;*/
|
|
|
|
Vec4V* PX_RESTRICT appliedForces = reinterpret_cast<Vec4V*>(ptr);
|
|
ptr += sizeof(Vec4V)*header->numFrictionConstr;
|
|
|
|
PxMemZero(appliedForces, sizeof(Vec4V) * header->numFrictionConstr);
|
|
|
|
for (PxU32 j = 0; j < maxAnchorCount; j++)
|
|
{
|
|
PxPrefetchLine(ptr, 384);
|
|
PxPrefetchLine(ptr, 512);
|
|
PxPrefetchLine(ptr, 640);
|
|
SolverContactFrictionStepBlock* PX_RESTRICT f0 = reinterpret_cast<SolverContactFrictionStepBlock*>(ptr);
|
|
ptr += frictionSize;
|
|
SolverContactFrictionStepBlock* PX_RESTRICT f1 = reinterpret_cast<SolverContactFrictionStepBlock*>(ptr);
|
|
ptr += frictionSize;
|
|
|
|
index0 = j < clampedAnchorCount0 ? j : index0;
|
|
index1 = j < clampedAnchorCount1 ? j : index1;
|
|
index2 = j < clampedAnchorCount2 ? j : index2;
|
|
index3 = j < clampedAnchorCount3 ? j : index3;
|
|
|
|
if (j >= clampedAnchorCount0)
|
|
maxImpulseScale = V4SetX(maxImpulseScale, fZero);
|
|
if (j >= clampedAnchorCount1)
|
|
maxImpulseScale = V4SetY(maxImpulseScale, fZero);
|
|
if (j >= clampedAnchorCount2)
|
|
maxImpulseScale = V4SetZ(maxImpulseScale, fZero);
|
|
if (j >= clampedAnchorCount3)
|
|
maxImpulseScale = V4SetW(maxImpulseScale, fZero);
|
|
|
|
t0X = V4Mul(maxImpulseScale, t0X);
|
|
t0Y = V4Mul(maxImpulseScale, t0Y);
|
|
t0Z = V4Mul(maxImpulseScale, t0Z);
|
|
|
|
t1X = V4Mul(maxImpulseScale, t1X);
|
|
t1Y = V4Mul(maxImpulseScale, t1Y);
|
|
t1Z = V4Mul(maxImpulseScale, t1Z);
|
|
|
|
const Vec4V body0Anchor0 = V4LoadU(&frictionPatch0.body0Anchors[index0].x);
|
|
const Vec4V body0Anchor1 = V4LoadU(&frictionPatch1.body0Anchors[index1].x);
|
|
const Vec4V body0Anchor2 = V4LoadU(&frictionPatch2.body0Anchors[index2].x);
|
|
const Vec4V body0Anchor3 = V4LoadU(&frictionPatch3.body0Anchors[index3].x);
|
|
|
|
Vec4V ra0 = QuatRotate4V(bodyFrame00q, body0Anchor0);
|
|
Vec4V ra1 = QuatRotate4V(bodyFrame01q, body0Anchor1);
|
|
Vec4V ra2 = QuatRotate4V(bodyFrame02q, body0Anchor2);
|
|
Vec4V ra3 = QuatRotate4V(bodyFrame03q, body0Anchor3);
|
|
|
|
Vec4V raX, raY, raZ;
|
|
PX_TRANSPOSE_44_34(ra0, ra1, ra2, ra3, raX, raY, raZ);
|
|
|
|
/*raX = V4Sel(V4IsGrtr(solverOffsetSlop, V4Abs(raX)), zero, raX);
|
|
raY = V4Sel(V4IsGrtr(solverOffsetSlop, V4Abs(raY)), zero, raY);
|
|
raZ = V4Sel(V4IsGrtr(solverOffsetSlop, V4Abs(raZ)), zero, raZ);*/
|
|
|
|
const Vec4V raWorldX = V4Add(raX, bodyFrame0pX);
|
|
const Vec4V raWorldY = V4Add(raY, bodyFrame0pY);
|
|
const Vec4V raWorldZ = V4Add(raZ, bodyFrame0pZ);
|
|
|
|
const Vec4V body1Anchor0 = V4LoadU(&frictionPatch0.body1Anchors[index0].x);
|
|
const Vec4V body1Anchor1 = V4LoadU(&frictionPatch1.body1Anchors[index1].x);
|
|
const Vec4V body1Anchor2 = V4LoadU(&frictionPatch2.body1Anchors[index2].x);
|
|
const Vec4V body1Anchor3 = V4LoadU(&frictionPatch3.body1Anchors[index3].x);
|
|
|
|
Vec4V rb0 = QuatRotate4V(bodyFrame10q, body1Anchor0);
|
|
Vec4V rb1 = QuatRotate4V(bodyFrame11q, body1Anchor1);
|
|
Vec4V rb2 = QuatRotate4V(bodyFrame12q, body1Anchor2);
|
|
Vec4V rb3 = QuatRotate4V(bodyFrame13q, body1Anchor3);
|
|
|
|
Vec4V rbX, rbY, rbZ;
|
|
PX_TRANSPOSE_44_34(rb0, rb1, rb2, rb3, rbX, rbY, rbZ);
|
|
|
|
/*rbX = V4Sel(V4IsGrtr(solverOffsetSlop, V4Abs(rbX)), zero, rbX);
|
|
rbY = V4Sel(V4IsGrtr(solverOffsetSlop, V4Abs(rbY)), zero, rbY);
|
|
rbZ = V4Sel(V4IsGrtr(solverOffsetSlop, V4Abs(rbZ)), zero, rbZ);*/
|
|
|
|
const Vec4V rbWorldX = V4Add(rbX, bodyFrame1pX);
|
|
const Vec4V rbWorldY = V4Add(rbY, bodyFrame1pY);
|
|
const Vec4V rbWorldZ = V4Add(rbZ, bodyFrame1pZ);
|
|
|
|
Vec4V errorX = V4Sub(raWorldX, rbWorldX);
|
|
Vec4V errorY = V4Sub(raWorldY, rbWorldY);
|
|
Vec4V errorZ = V4Sub(raWorldZ, rbWorldZ);
|
|
|
|
/*errorX = V4Sel(V4IsGrtr(solverOffsetSlop, V4Abs(errorX)), zero, errorX);
|
|
errorY = V4Sel(V4IsGrtr(solverOffsetSlop, V4Abs(errorY)), zero, errorY);
|
|
errorZ = V4Sel(V4IsGrtr(solverOffsetSlop, V4Abs(errorZ)), zero, errorZ);*/
|
|
|
|
PxU32 contactIndex0 = c.contactID[frictionIndex0][index0];
|
|
PxU32 contactIndex1 = c.contactID[frictionIndex1][index1];
|
|
PxU32 contactIndex2 = c.contactID[frictionIndex2][index2];
|
|
PxU32 contactIndex3 = c.contactID[frictionIndex3][index3];
|
|
|
|
//Ensure that the contact indices are valid
|
|
PX_ASSERT(contactIndex0 == 0xffff || contactIndex0 < descs[0].numContacts);
|
|
PX_ASSERT(contactIndex1 == 0xffff || contactIndex1 < descs[1].numContacts);
|
|
PX_ASSERT(contactIndex2 == 0xffff || contactIndex2 < descs[2].numContacts);
|
|
PX_ASSERT(contactIndex3 == 0xffff || contactIndex3 < descs[3].numContacts);
|
|
|
|
Vec4V targetVel0 = V4LoadA(contactIndex0 == 0xFFFF ? &contactBase0->targetVel.x : &descs[0].contacts[contactIndex0].targetVel.x);
|
|
Vec4V targetVel1 = V4LoadA(contactIndex1 == 0xFFFF ? &contactBase0->targetVel.x : &descs[1].contacts[contactIndex1].targetVel.x);
|
|
Vec4V targetVel2 = V4LoadA(contactIndex2 == 0xFFFF ? &contactBase0->targetVel.x : &descs[2].contacts[contactIndex2].targetVel.x);
|
|
Vec4V targetVel3 = V4LoadA(contactIndex3 == 0xFFFF ? &contactBase0->targetVel.x : &descs[3].contacts[contactIndex3].targetVel.x);
|
|
|
|
Vec4V targetVelX, targetVelY, targetVelZ;
|
|
PX_TRANSPOSE_44_34(targetVel0, targetVel1, targetVel2, targetVel3, targetVelX, targetVelY, targetVelZ);
|
|
|
|
{
|
|
Vec4V raXnX = V4NegMulSub(raZ, t0Y, V4Mul(raY, t0Z));
|
|
Vec4V raXnY = V4NegMulSub(raX, t0Z, V4Mul(raZ, t0X));
|
|
Vec4V raXnZ = V4NegMulSub(raY, t0X, V4Mul(raX, t0Y));
|
|
|
|
raXnX = V4Sel(V4IsGrtr(solverOffsetSlop, V4Abs(raXnX)), zero, raXnX);
|
|
raXnY = V4Sel(V4IsGrtr(solverOffsetSlop, V4Abs(raXnY)), zero, raXnY);
|
|
raXnZ = V4Sel(V4IsGrtr(solverOffsetSlop, V4Abs(raXnZ)), zero, raXnZ);
|
|
|
|
Vec4V delAngVel0X = V4Mul(invInertia0X0, raXnX);
|
|
Vec4V delAngVel0Y = V4Mul(invInertia0X1, raXnX);
|
|
Vec4V delAngVel0Z = V4Mul(invInertia0X2, raXnX);
|
|
|
|
delAngVel0X = V4MulAdd(invInertia0Y0, raXnY, delAngVel0X);
|
|
delAngVel0Y = V4MulAdd(invInertia0Y1, raXnY, delAngVel0Y);
|
|
delAngVel0Z = V4MulAdd(invInertia0Y2, raXnY, delAngVel0Z);
|
|
|
|
delAngVel0X = V4MulAdd(invInertia0Z0, raXnZ, delAngVel0X);
|
|
delAngVel0Y = V4MulAdd(invInertia0Z1, raXnZ, delAngVel0Y);
|
|
delAngVel0Z = V4MulAdd(invInertia0Z2, raXnZ, delAngVel0Z);
|
|
|
|
const Vec4V dotDelAngVel0 = V4MulAdd(delAngVel0Z, delAngVel0Z, V4MulAdd(delAngVel0Y, delAngVel0Y, V4Mul(delAngVel0X, delAngVel0X)));
|
|
|
|
Vec4V resp = V4MulAdd(dotDelAngVel0, angDom0, invMass0D0);
|
|
|
|
const Vec4V tVel0 = V4MulAdd(t0Z, linVelT20, V4MulAdd(t0Y, linVelT10, V4Mul(t0X, linVelT00)));
|
|
Vec4V vrel0 = V4MulAdd(raXnZ, angVelT20, V4MulAdd(raXnY, angVelT10, V4MulAdd(raXnX, angVelT00, tVel0)));
|
|
|
|
Vec4V delAngVel1X = zero;
|
|
Vec4V delAngVel1Y = zero;
|
|
Vec4V delAngVel1Z = zero;
|
|
|
|
Vec4V vrel1 = zero;
|
|
|
|
if (isDynamic)
|
|
{
|
|
Vec4V rbXnX = V4NegMulSub(rbZ, t0Y, V4Mul(rbY, t0Z));
|
|
Vec4V rbXnY = V4NegMulSub(rbX, t0Z, V4Mul(rbZ, t0X));
|
|
Vec4V rbXnZ = V4NegMulSub(rbY, t0X, V4Mul(rbX, t0Y));
|
|
|
|
rbXnX = V4Sel(V4IsGrtr(solverOffsetSlop, V4Abs(rbXnX)), zero, rbXnX);
|
|
rbXnY = V4Sel(V4IsGrtr(solverOffsetSlop, V4Abs(rbXnY)), zero, rbXnY);
|
|
rbXnZ = V4Sel(V4IsGrtr(solverOffsetSlop, V4Abs(rbXnZ)), zero, rbXnZ);
|
|
|
|
delAngVel1X = V4Mul(invInertia1X0, rbXnX);
|
|
delAngVel1Y = V4Mul(invInertia1X1, rbXnX);
|
|
delAngVel1Z = V4Mul(invInertia1X2, rbXnX);
|
|
|
|
delAngVel1X = V4MulAdd(invInertia1Y0, rbXnY, delAngVel1X);
|
|
delAngVel1Y = V4MulAdd(invInertia1Y1, rbXnY, delAngVel1Y);
|
|
delAngVel1Z = V4MulAdd(invInertia1Y2, rbXnY, delAngVel1Z);
|
|
|
|
delAngVel1X = V4MulAdd(invInertia1Z0, rbXnZ, delAngVel1X);
|
|
delAngVel1Y = V4MulAdd(invInertia1Z1, rbXnZ, delAngVel1Y);
|
|
delAngVel1Z = V4MulAdd(invInertia1Z2, rbXnZ, delAngVel1Z);
|
|
|
|
const Vec4V dotDelAngVel1 = V4MulAdd(delAngVel1Z, delAngVel1Z, V4MulAdd(delAngVel1Y, delAngVel1Y, V4Mul(delAngVel1X, delAngVel1X)));
|
|
|
|
const Vec4V resp1 = V4MulAdd(dotDelAngVel1, angDom1, invMass1D1);
|
|
|
|
resp = V4Add(resp, resp1);
|
|
|
|
const Vec4V tVel1 = V4MulAdd(t0Z, linVelT21, V4MulAdd(t0Y, linVelT11, V4Mul(t0X, linVelT01)));
|
|
vrel1 = V4MulAdd(rbXnZ, angVelT21, V4MulAdd(rbXnY, angVelT11, V4MulAdd(rbXnX, angVelT01, tVel1)));
|
|
}
|
|
else if (hasKinematic)
|
|
{
|
|
const Vec4V rbXnX = V4NegMulSub(rbZ, t0Y, V4Mul(rbY, t0Z));
|
|
const Vec4V rbXnY = V4NegMulSub(rbX, t0Z, V4Mul(rbZ, t0X));
|
|
const Vec4V rbXnZ = V4NegMulSub(rbY, t0X, V4Mul(rbX, t0Y));
|
|
|
|
const Vec4V tVel1 = V4MulAdd(t0Z, linVelT21, V4MulAdd(t0Y, linVelT11, V4Mul(t0X, linVelT01)));
|
|
vrel1 = V4MulAdd(rbXnZ, angVelT21, V4MulAdd(rbXnY, angVelT11, V4MulAdd(rbXnX, angVelT01, tVel1)));
|
|
}
|
|
|
|
f0->rbXnI[0] = delAngVel1X;
|
|
f0->rbXnI[1] = delAngVel1Y;
|
|
f0->rbXnI[2] = delAngVel1Z;
|
|
|
|
const Vec4V velMultiplier = V4Mul(maxImpulseScale, V4Sel(V4IsGrtr(resp, zero), V4Div(p84, resp), zero));
|
|
|
|
Vec4V error = V4MulAdd(t0Z, errorZ, V4MulAdd(t0Y, errorY, V4Mul(t0X, errorX)));
|
|
|
|
Vec4V targetVel = V4NegMulSub(vrel0, kinematicScale0, V4MulAdd(vrel1, kinematicScale1, V4MulAdd(t0Z, targetVelZ, V4MulAdd(t0Y, targetVelY, V4Mul(t0X, targetVelX)))));
|
|
|
|
f0->normal[0] = t0X;
|
|
f0->normal[1] = t0Y;
|
|
f0->normal[2] = t0Z;
|
|
f0->raXnI[0] = delAngVel0X;
|
|
f0->raXnI[1] = delAngVel0Y;
|
|
f0->raXnI[2] = delAngVel0Z;
|
|
f0->error = error;
|
|
f0->velMultiplier = velMultiplier;
|
|
f0->biasCoefficient = V4Splat(frictionBiasScale);
|
|
f0->targetVel = targetVel;
|
|
}
|
|
|
|
{
|
|
Vec4V raXnX = V4NegMulSub(raZ, t1Y, V4Mul(raY, t1Z));
|
|
Vec4V raXnY = V4NegMulSub(raX, t1Z, V4Mul(raZ, t1X));
|
|
Vec4V raXnZ = V4NegMulSub(raY, t1X, V4Mul(raX, t1Y));
|
|
raXnX = V4Sel(V4IsGrtr(solverOffsetSlop, V4Abs(raXnX)), zero, raXnX);
|
|
raXnY = V4Sel(V4IsGrtr(solverOffsetSlop, V4Abs(raXnY)), zero, raXnY);
|
|
raXnZ = V4Sel(V4IsGrtr(solverOffsetSlop, V4Abs(raXnZ)), zero, raXnZ);
|
|
|
|
Vec4V delAngVel0X = V4Mul(invInertia0X0, raXnX);
|
|
Vec4V delAngVel0Y = V4Mul(invInertia0X1, raXnX);
|
|
Vec4V delAngVel0Z = V4Mul(invInertia0X2, raXnX);
|
|
|
|
delAngVel0X = V4MulAdd(invInertia0Y0, raXnY, delAngVel0X);
|
|
delAngVel0Y = V4MulAdd(invInertia0Y1, raXnY, delAngVel0Y);
|
|
delAngVel0Z = V4MulAdd(invInertia0Y2, raXnY, delAngVel0Z);
|
|
|
|
delAngVel0X = V4MulAdd(invInertia0Z0, raXnZ, delAngVel0X);
|
|
delAngVel0Y = V4MulAdd(invInertia0Z1, raXnZ, delAngVel0Y);
|
|
delAngVel0Z = V4MulAdd(invInertia0Z2, raXnZ, delAngVel0Z);
|
|
|
|
const Vec4V dotDelAngVel0 = V4MulAdd(delAngVel0Z, delAngVel0Z, V4MulAdd(delAngVel0Y, delAngVel0Y, V4Mul(delAngVel0X, delAngVel0X)));
|
|
|
|
Vec4V resp = V4MulAdd(dotDelAngVel0, angDom0, invMass0D0);
|
|
|
|
const Vec4V tVel0 = V4MulAdd(t1Z, linVelT20, V4MulAdd(t1Y, linVelT10, V4Mul(t1X, linVelT00)));
|
|
Vec4V vrel0 = V4MulAdd(raXnZ, angVelT20, V4MulAdd(raXnY, angVelT10, V4MulAdd(raXnX, angVelT00, tVel0)));
|
|
|
|
Vec4V delAngVel1X = zero;
|
|
Vec4V delAngVel1Y = zero;
|
|
Vec4V delAngVel1Z = zero;
|
|
|
|
Vec4V vrel1 = zero;
|
|
|
|
if (isDynamic)
|
|
{
|
|
Vec4V rbXnX = V4NegMulSub(rbZ, t1Y, V4Mul(rbY, t1Z));
|
|
Vec4V rbXnY = V4NegMulSub(rbX, t1Z, V4Mul(rbZ, t1X));
|
|
Vec4V rbXnZ = V4NegMulSub(rbY, t1X, V4Mul(rbX, t1Y));
|
|
rbXnX = V4Sel(V4IsGrtr(solverOffsetSlop, V4Abs(rbXnX)), zero, rbXnX);
|
|
rbXnY = V4Sel(V4IsGrtr(solverOffsetSlop, V4Abs(rbXnY)), zero, rbXnY);
|
|
rbXnZ = V4Sel(V4IsGrtr(solverOffsetSlop, V4Abs(rbXnZ)), zero, rbXnZ);
|
|
|
|
delAngVel1X = V4Mul(invInertia1X0, rbXnX);
|
|
delAngVel1Y = V4Mul(invInertia1X1, rbXnX);
|
|
delAngVel1Z = V4Mul(invInertia1X2, rbXnX);
|
|
|
|
delAngVel1X = V4MulAdd(invInertia1Y0, rbXnY, delAngVel1X);
|
|
delAngVel1Y = V4MulAdd(invInertia1Y1, rbXnY, delAngVel1Y);
|
|
delAngVel1Z = V4MulAdd(invInertia1Y2, rbXnY, delAngVel1Z);
|
|
|
|
delAngVel1X = V4MulAdd(invInertia1Z0, rbXnZ, delAngVel1X);
|
|
delAngVel1Y = V4MulAdd(invInertia1Z1, rbXnZ, delAngVel1Y);
|
|
delAngVel1Z = V4MulAdd(invInertia1Z2, rbXnZ, delAngVel1Z);
|
|
|
|
const Vec4V dotDelAngVel1 = V4MulAdd(delAngVel1Z, delAngVel1Z, V4MulAdd(delAngVel1Y, delAngVel1Y, V4Mul(delAngVel1X, delAngVel1X)));
|
|
|
|
const Vec4V resp1 = V4MulAdd(dotDelAngVel1, angDom1, invMass1D1);
|
|
|
|
resp = V4Add(resp, resp1);
|
|
|
|
const Vec4V tVel1 = V4MulAdd(t1Z, linVelT21, V4MulAdd(t1Y, linVelT11, V4Mul(t1X, linVelT01)));
|
|
vrel1 = V4MulAdd(rbXnZ, angVelT21, V4MulAdd(rbXnY, angVelT11, V4MulAdd(rbXnX, angVelT01, tVel1)));
|
|
}
|
|
else if (hasKinematic)
|
|
{
|
|
const Vec4V rbXnX = V4NegMulSub(rbZ, t1Y, V4Mul(rbY, t1Z));
|
|
const Vec4V rbXnY = V4NegMulSub(rbX, t1Z, V4Mul(rbZ, t1X));
|
|
const Vec4V rbXnZ = V4NegMulSub(rbY, t1X, V4Mul(rbX, t1Y));
|
|
|
|
const Vec4V tVel1 = V4MulAdd(t1Z, linVelT21, V4MulAdd(t1Y, linVelT11, V4Mul(t1X, linVelT01)));
|
|
vrel1 = V4MulAdd(rbXnZ, angVelT21, V4MulAdd(rbXnY, angVelT11, V4MulAdd(rbXnX, angVelT01, tVel1)));
|
|
}
|
|
|
|
f1->rbXnI[0] = delAngVel1X;
|
|
f1->rbXnI[1] = delAngVel1Y;
|
|
f1->rbXnI[2] = delAngVel1Z;
|
|
|
|
const Vec4V velMultiplier = V4Mul(maxImpulseScale, V4Sel(V4IsGrtr(resp, zero), V4Div(p84, resp), zero));
|
|
|
|
Vec4V error = V4MulAdd(t1Z, errorZ, V4MulAdd(t1Y, errorY, V4Mul(t1X, errorX)));
|
|
|
|
Vec4V targetVel = V4NegMulSub(vrel0, kinematicScale0, V4MulAdd(vrel1, kinematicScale1, V4MulAdd(t1Z, targetVelZ, V4MulAdd(t1Y, targetVelY, V4Mul(t1X, targetVelX)))));
|
|
|
|
f1->normal[0] = t1X;
|
|
f1->normal[1] = t1Y;
|
|
f1->normal[2] = t1Z;
|
|
f1->raXnI[0] = delAngVel0X;
|
|
f1->raXnI[1] = delAngVel0Y;
|
|
f1->raXnI[2] = delAngVel0Z;
|
|
f1->error = error;
|
|
f1->velMultiplier = velMultiplier;
|
|
f1->targetVel = targetVel;
|
|
f1->biasCoefficient = V4Splat(frictionBiasScale);
|
|
}
|
|
}
|
|
|
|
header->dynamicFriction = V4LoadA(dynamicFriction);
|
|
header->staticFriction = V4LoadA(staticFriction);
|
|
|
|
frictionPatchWritebackAddrIndex0++;
|
|
frictionPatchWritebackAddrIndex1++;
|
|
frictionPatchWritebackAddrIndex2++;
|
|
frictionPatchWritebackAddrIndex3++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static PX_FORCE_INLINE void computeBlockStreamFrictionByteSizes(const CorrelationBuffer& c,
|
|
PxU32& _frictionPatchByteSize, PxU32& _numFrictionPatches,
|
|
PxU32 frictionPatchStartIndex, PxU32 frictionPatchEndIndex)
|
|
{
|
|
// PT: use local vars to remove LHS
|
|
PxU32 numFrictionPatches = 0;
|
|
|
|
for (PxU32 i = frictionPatchStartIndex; i < frictionPatchEndIndex; i++)
|
|
{
|
|
//Friction patches.
|
|
if (c.correlationListHeads[i] != CorrelationBuffer::LIST_END)
|
|
numFrictionPatches++;
|
|
}
|
|
PxU32 frictionPatchByteSize = numFrictionPatches * sizeof(FrictionPatch);
|
|
|
|
_numFrictionPatches = numFrictionPatches;
|
|
|
|
//16-byte alignment.
|
|
_frictionPatchByteSize = ((frictionPatchByteSize + 0x0f) & ~0x0f);
|
|
PX_ASSERT(0 == (_frictionPatchByteSize & 0x0f));
|
|
}
|
|
|
|
static bool reserveFrictionBlockStreams(const CorrelationBuffer& c, PxConstraintAllocator& constraintAllocator, PxU32 frictionPatchStartIndex, PxU32 frictionPatchEndIndex,
|
|
FrictionPatch*& _frictionPatches,
|
|
PxU32& numFrictionPatches)
|
|
{
|
|
//From frictionPatchStream we just need to reserve a single buffer.
|
|
PxU32 frictionPatchByteSize = 0;
|
|
//Compute the sizes of all the buffers.
|
|
|
|
computeBlockStreamFrictionByteSizes(c, frictionPatchByteSize, numFrictionPatches, frictionPatchStartIndex, frictionPatchEndIndex);
|
|
|
|
FrictionPatch* frictionPatches = NULL;
|
|
//If the constraint block reservation didn't fail then reserve the friction buffer too.
|
|
if (frictionPatchByteSize > 0)
|
|
{
|
|
frictionPatches = reinterpret_cast<FrictionPatch*>(constraintAllocator.reserveFrictionData(frictionPatchByteSize));
|
|
if(!checkFrictionDataPtr(frictionPatches))
|
|
frictionPatches = NULL;
|
|
}
|
|
|
|
_frictionPatches = frictionPatches;
|
|
|
|
//Return true if neither of the two block reservations failed.
|
|
return (0 == frictionPatchByteSize || frictionPatches);
|
|
}
|
|
|
|
//The persistent friction patch correlation/allocation will already have happenned as this is per-pair.
|
|
//This function just computes the size of the combined solve data.
|
|
static void computeBlockStreamByteSizes4(PxTGSSolverContactDesc* descs,
|
|
PxU32& _solverConstraintByteSize, PxU32* _axisConstraintCount,
|
|
const CorrelationBuffer& c)
|
|
{
|
|
PX_ASSERT(0 == _solverConstraintByteSize);
|
|
|
|
PxU32 maxPatches = 0;
|
|
PxU32 maxContactCount[CorrelationBuffer::MAX_FRICTION_PATCHES];
|
|
PxU32 maxFrictionCount[CorrelationBuffer::MAX_FRICTION_PATCHES];
|
|
PxMemZero(maxContactCount, sizeof(maxContactCount));
|
|
PxMemZero(maxFrictionCount, sizeof(maxFrictionCount));
|
|
bool hasMaxImpulse = false;
|
|
|
|
for (PxU32 a = 0; a < 4; ++a)
|
|
{
|
|
PxU32 axisConstraintCount = 0;
|
|
hasMaxImpulse = hasMaxImpulse || descs[a].hasMaxImpulse;
|
|
for (PxU32 i = 0; i < descs[a].numFrictionPatches; i++)
|
|
{
|
|
PxU32 ind = i + descs[a].startFrictionPatchIndex;
|
|
|
|
const FrictionPatch& frictionPatch = c.frictionPatches[ind];
|
|
|
|
const bool haveFriction = (frictionPatch.materialFlags & PxMaterialFlag::eDISABLE_FRICTION) == 0
|
|
&& frictionPatch.anchorCount != 0;
|
|
//Solver constraint data.
|
|
if (c.frictionPatchContactCounts[ind] != 0)
|
|
{
|
|
maxContactCount[i] = PxMax(c.frictionPatchContactCounts[ind], maxContactCount[i]);
|
|
axisConstraintCount += c.frictionPatchContactCounts[ind];
|
|
|
|
if (haveFriction)
|
|
{
|
|
const PxU32 fricCount = PxU32(c.frictionPatches[ind].anchorCount) * 2;
|
|
maxFrictionCount[i] = PxMax(fricCount, maxFrictionCount[i]);
|
|
axisConstraintCount += fricCount;
|
|
}
|
|
}
|
|
}
|
|
maxPatches = PxMax(descs[a].numFrictionPatches, maxPatches);
|
|
_axisConstraintCount[a] = axisConstraintCount;
|
|
}
|
|
|
|
PxU32 totalContacts = 0, totalFriction = 0;
|
|
for (PxU32 a = 0; a < maxPatches; ++a)
|
|
{
|
|
totalContacts += maxContactCount[a];
|
|
totalFriction += maxFrictionCount[a];
|
|
}
|
|
|
|
//OK, we have a given number of friction patches, contact points and friction constraints so we can calculate how much memory we need
|
|
|
|
//Body 2 is considered static if it is either *not dynamic* or *kinematic*
|
|
|
|
/*bool hasDynamicBody = false;
|
|
for (PxU32 a = 0; a < 4; ++a)
|
|
{
|
|
hasDynamicBody = hasDynamicBody || ((descs[a].bodyState1 == PxSolverContactDesc::eDYNAMIC_BODY));
|
|
}
|
|
|
|
const bool isStatic = !hasDynamicBody;*/
|
|
|
|
const PxU32 headerSize = sizeof(SolverContactHeaderStepBlock) * maxPatches;
|
|
//PxU32 constraintSize = isStatic ? (sizeof(SolverContactBatchPointBase4) * totalContacts) + (sizeof(SolverContactFrictionBase4) * totalFriction) :
|
|
// (sizeof(SolverContactBatchPointDynamic4) * totalContacts) + (sizeof(SolverContactFrictionDynamic4) * totalFriction);
|
|
|
|
PxU32 constraintSize = (sizeof(SolverContactPointStepBlock) * totalContacts) + (sizeof(SolverContactFrictionStepBlock) * totalFriction);
|
|
|
|
//Space for the appliedForce buffer
|
|
constraintSize += sizeof(Vec4V)*(totalContacts + totalFriction);
|
|
|
|
//If we have max impulse, reserve a buffer for it
|
|
if (hasMaxImpulse)
|
|
constraintSize += sizeof(aos::Vec4V) * totalContacts;
|
|
|
|
_solverConstraintByteSize = ((constraintSize + headerSize + 0x0f) & ~0x0f);
|
|
PX_ASSERT(0 == (_solverConstraintByteSize & 0x0f));
|
|
}
|
|
|
|
static SolverConstraintPrepState::Enum reserveBlockStreams4(PxTGSSolverContactDesc* descs, Dy::CorrelationBuffer& c,
|
|
PxU8*& solverConstraint, PxU32* axisConstraintCount,
|
|
PxU32& solverConstraintByteSize,
|
|
PxConstraintAllocator& constraintAllocator)
|
|
{
|
|
PX_ASSERT(NULL == solverConstraint);
|
|
PX_ASSERT(0 == solverConstraintByteSize);
|
|
|
|
//Compute the sizes of all the buffers.
|
|
computeBlockStreamByteSizes4(descs,
|
|
solverConstraintByteSize, axisConstraintCount,
|
|
c);
|
|
|
|
//Reserve the buffers.
|
|
|
|
//First reserve the accumulated buffer size for the constraint block.
|
|
PxU8* constraintBlock = NULL;
|
|
const PxU32 constraintBlockByteSize = solverConstraintByteSize;
|
|
if (constraintBlockByteSize > 0)
|
|
{
|
|
if ((constraintBlockByteSize + 16u) > 16384)
|
|
return SolverConstraintPrepState::eUNBATCHABLE;
|
|
|
|
constraintBlock = constraintAllocator.reserveConstraintData(constraintBlockByteSize + 16u);
|
|
if(!checkConstraintDataPtr<false>(constraintBlock))
|
|
constraintBlock = NULL;
|
|
}
|
|
|
|
//Patch up the individual ptrs to the buffer returned by the constraint block reservation (assuming the reservation didn't fail).
|
|
if (0 == constraintBlockByteSize || constraintBlock)
|
|
{
|
|
if (solverConstraintByteSize)
|
|
{
|
|
solverConstraint = constraintBlock;
|
|
PX_ASSERT(0 == (uintptr_t(solverConstraint) & 0x0f));
|
|
}
|
|
}
|
|
|
|
return ((0 == constraintBlockByteSize || constraintBlock)) ? SolverConstraintPrepState::eSUCCESS : SolverConstraintPrepState::eOUT_OF_MEMORY;
|
|
}
|
|
|
|
SolverConstraintPrepState::Enum createFinalizeSolverContacts4Step(
|
|
Dy::CorrelationBuffer& c,
|
|
PxTGSSolverContactDesc* blockDescs,
|
|
const PxReal invDtF32,
|
|
const PxReal totalDtF32,
|
|
const PxReal invTotalDtF32,
|
|
const PxReal dt,
|
|
const PxReal bounceThresholdF32,
|
|
const PxReal frictionOffsetThreshold,
|
|
const PxReal correlationDistance,
|
|
const PxReal biasCoefficient,
|
|
PxConstraintAllocator& constraintAllocator)
|
|
{
|
|
PX_ALIGN(16, PxReal invMassScale0[4]);
|
|
PX_ALIGN(16, PxReal invMassScale1[4]);
|
|
PX_ALIGN(16, PxReal invInertiaScale0[4]);
|
|
PX_ALIGN(16, PxReal invInertiaScale1[4]);
|
|
|
|
c.frictionPatchCount = 0;
|
|
c.contactPatchCount = 0;
|
|
|
|
for (PxU32 a = 0; a < 4; ++a)
|
|
{
|
|
PxTGSSolverContactDesc& blockDesc = blockDescs[a];
|
|
|
|
invMassScale0[a] = blockDesc.invMassScales.linear0;
|
|
invMassScale1[a] = blockDesc.invMassScales.linear1;
|
|
invInertiaScale0[a] = blockDesc.invMassScales.angular0;
|
|
invInertiaScale1[a] = blockDesc.invMassScales.angular1;
|
|
|
|
blockDesc.startFrictionPatchIndex = c.frictionPatchCount;
|
|
if (!(blockDesc.disableStrongFriction))
|
|
{
|
|
const bool valid = getFrictionPatches(c, blockDesc.frictionPtr, blockDesc.frictionCount,
|
|
blockDesc.bodyFrame0, blockDesc.bodyFrame1, correlationDistance);
|
|
if (!valid)
|
|
return SolverConstraintPrepState::eUNBATCHABLE;
|
|
}
|
|
//Create the contact patches
|
|
blockDesc.startContactPatchIndex = c.contactPatchCount;
|
|
if (!createContactPatches(c, blockDesc.contacts, blockDesc.numContacts, PXC_SAME_NORMAL))
|
|
return SolverConstraintPrepState::eUNBATCHABLE;
|
|
blockDesc.numContactPatches = PxU16(c.contactPatchCount - blockDesc.startContactPatchIndex);
|
|
|
|
const bool overflow = correlatePatches(c, blockDesc.contacts, blockDesc.bodyFrame0, blockDesc.bodyFrame1, PXC_SAME_NORMAL,
|
|
blockDesc.startContactPatchIndex, blockDesc.startFrictionPatchIndex);
|
|
|
|
if (overflow)
|
|
return SolverConstraintPrepState::eUNBATCHABLE;
|
|
|
|
growPatches(c, blockDesc.contacts, blockDesc.bodyFrame0, blockDesc.bodyFrame1, blockDesc.startFrictionPatchIndex,
|
|
frictionOffsetThreshold + blockDescs[a].restDistance);
|
|
|
|
//Remove the empty friction patches - do we actually need to do this?
|
|
for (PxU32 p = c.frictionPatchCount; p > blockDesc.startFrictionPatchIndex; --p)
|
|
{
|
|
if (c.correlationListHeads[p - 1] == 0xffff)
|
|
{
|
|
//We have an empty patch...need to bin this one...
|
|
for (PxU32 p2 = p; p2 < c.frictionPatchCount; ++p2)
|
|
{
|
|
c.correlationListHeads[p2 - 1] = c.correlationListHeads[p2];
|
|
c.frictionPatchContactCounts[p2 - 1] = c.frictionPatchContactCounts[p2];
|
|
}
|
|
c.frictionPatchCount--;
|
|
}
|
|
}
|
|
|
|
PxU32 numFricPatches = c.frictionPatchCount - blockDesc.startFrictionPatchIndex;
|
|
blockDesc.numFrictionPatches = numFricPatches;
|
|
}
|
|
|
|
FrictionPatch* frictionPatchArray[4];
|
|
PxU32 frictionPatchCounts[4];
|
|
|
|
for (PxU32 a = 0; a < 4; ++a)
|
|
{
|
|
PxTGSSolverContactDesc& blockDesc = blockDescs[a];
|
|
|
|
const bool successfulReserve = reserveFrictionBlockStreams(c, constraintAllocator, blockDesc.startFrictionPatchIndex, blockDesc.numFrictionPatches + blockDesc.startFrictionPatchIndex,
|
|
frictionPatchArray[a],
|
|
frictionPatchCounts[a]);
|
|
|
|
//KS - TODO - how can we recover if we failed to allocate this memory?
|
|
if (!successfulReserve)
|
|
{
|
|
return SolverConstraintPrepState::eOUT_OF_MEMORY;
|
|
}
|
|
}
|
|
//At this point, all the friction data has been calculated, the correlation has been done. Provided this was all successful,
|
|
//we are ready to create the batched constraints
|
|
|
|
PxU8* solverConstraint = NULL;
|
|
PxU32 solverConstraintByteSize = 0;
|
|
|
|
{
|
|
PxU32 axisConstraintCount[4];
|
|
SolverConstraintPrepState::Enum state = reserveBlockStreams4(blockDescs, c,
|
|
solverConstraint, axisConstraintCount,
|
|
solverConstraintByteSize,
|
|
constraintAllocator);
|
|
|
|
if (state != SolverConstraintPrepState::eSUCCESS)
|
|
return state;
|
|
|
|
for (PxU32 a = 0; a < 4; ++a)
|
|
{
|
|
FrictionPatch* frictionPatches = frictionPatchArray[a];
|
|
|
|
PxTGSSolverContactDesc& blockDesc = blockDescs[a];
|
|
PxSolverConstraintDesc& desc = *blockDesc.desc;
|
|
blockDesc.frictionPtr = reinterpret_cast<PxU8*>(frictionPatches);
|
|
blockDesc.frictionCount = PxTo8(frictionPatchCounts[a]);
|
|
|
|
//Initialise friction buffer.
|
|
if (frictionPatches)
|
|
{
|
|
frictionPatches->prefetch();
|
|
|
|
for (PxU32 i = 0; i<blockDesc.numFrictionPatches; i++)
|
|
{
|
|
if (c.correlationListHeads[blockDesc.startFrictionPatchIndex + i] != CorrelationBuffer::LIST_END)
|
|
{
|
|
//*frictionPatches++ = c.frictionPatches[blockDesc.startFrictionPatchIndex + i];
|
|
PxMemCopy(frictionPatches++, &c.frictionPatches[blockDesc.startFrictionPatchIndex + i], sizeof(FrictionPatch));
|
|
//PxPrefetchLine(frictionPatches, 256);
|
|
}
|
|
}
|
|
}
|
|
|
|
blockDesc.axisConstraintCount += PxTo16(axisConstraintCount[a]);
|
|
|
|
desc.constraint = solverConstraint;
|
|
desc.constraintLengthOver16 = PxTo16(solverConstraintByteSize / 16);
|
|
desc.writeBack = blockDesc.contactForces;
|
|
}
|
|
|
|
const Vec4V iMassScale0 = V4LoadA(invMassScale0);
|
|
const Vec4V iInertiaScale0 = V4LoadA(invInertiaScale0);
|
|
const Vec4V iMassScale1 = V4LoadA(invMassScale1);
|
|
const Vec4V iInertiaScale1 = V4LoadA(invInertiaScale1);
|
|
|
|
setupFinalizeSolverConstraints4Step(blockDescs, c, solverConstraint, invDtF32, totalDtF32, invTotalDtF32, dt, bounceThresholdF32,
|
|
biasCoefficient, iMassScale0, iInertiaScale0, iMassScale1, iInertiaScale1);
|
|
|
|
PX_ASSERT((*solverConstraint == DY_SC_TYPE_BLOCK_RB_CONTACT) || (*solverConstraint == DY_SC_TYPE_BLOCK_STATIC_RB_CONTACT));
|
|
|
|
*(reinterpret_cast<PxU32*>(solverConstraint + solverConstraintByteSize)) = 0;
|
|
}
|
|
return SolverConstraintPrepState::eSUCCESS;
|
|
}
|
|
|
|
SolverConstraintPrepState::Enum createFinalizeSolverContacts4Step(
|
|
PxsContactManagerOutput** cmOutputs,
|
|
ThreadContext& threadContext,
|
|
PxTGSSolverContactDesc* blockDescs,
|
|
const PxReal invDtF32,
|
|
const PxReal totalDtF32,
|
|
const PxReal invTotalDtF32,
|
|
const PxReal dtF32,
|
|
const PxReal bounceThresholdF32,
|
|
const PxReal frictionOffsetThreshold,
|
|
const PxReal correlationDistance,
|
|
const PxReal biasCoefficient,
|
|
PxConstraintAllocator& constraintAllocator)
|
|
{
|
|
for (PxU32 a = 0; a < 4; ++a)
|
|
{
|
|
blockDescs[a].desc->constraintLengthOver16 = 0;
|
|
}
|
|
|
|
// PT: commented out because TGS does not support compound constraints
|
|
//PX_ASSERT(cmOutputs[0]->nbContacts && cmOutputs[1]->nbContacts && cmOutputs[2]->nbContacts && cmOutputs[3]->nbContacts);
|
|
|
|
PxContactBuffer& buffer = threadContext.mContactBuffer;
|
|
|
|
buffer.count = 0;
|
|
|
|
CorrelationBuffer& c = threadContext.mCorrelationBuffer;
|
|
|
|
for (PxU32 a = 0; a < 4; ++a)
|
|
{
|
|
PxTGSSolverContactDesc& blockDesc = blockDescs[a];
|
|
PxSolverConstraintDesc& desc = *blockDesc.desc;
|
|
|
|
//blockDesc.startContactIndex = buffer.count;
|
|
blockDesc.contacts = buffer.contacts + buffer.count;
|
|
|
|
PxPrefetchLine(desc.bodyA);
|
|
PxPrefetchLine(desc.bodyB);
|
|
|
|
//Unbatchable if we have (a) too many contacts or (b) torsional friction enabled - it just seems easier to handle this on an individual contact basis because it is expected to
|
|
//be used relatively rarely
|
|
if ((buffer.count + cmOutputs[a]->nbContacts) > 64 || (blockDesc.torsionalPatchRadius != 0.f || blockDesc.minTorsionalPatchRadius != 0.f) )
|
|
{
|
|
return SolverConstraintPrepState::eUNBATCHABLE;
|
|
}
|
|
|
|
bool hasMaxImpulse = false;
|
|
bool hasTargetVelocity = false;
|
|
|
|
//OK...do the correlation here as well...
|
|
PxPrefetchLine(blockDescs[a].frictionPtr);
|
|
PxPrefetchLine(blockDescs[a].frictionPtr, 64);
|
|
PxPrefetchLine(blockDescs[a].frictionPtr, 128);
|
|
|
|
if (a < 3)
|
|
{
|
|
PxPrefetchLine(cmOutputs[a]->contactPatches);
|
|
PxPrefetchLine(cmOutputs[a]->contactPoints);
|
|
}
|
|
|
|
PxReal invMassScale0, invMassScale1, invInertiaScale0, invInertiaScale1;
|
|
|
|
const PxReal defaultMaxImpulse = PxMin(blockDesc.bodyData0->maxContactImpulse, blockDesc.bodyData1->maxContactImpulse);
|
|
|
|
PxU32 contactCount = extractContacts(buffer, *cmOutputs[a], hasMaxImpulse, hasTargetVelocity, invMassScale0, invMassScale1,
|
|
invInertiaScale0, invInertiaScale1, defaultMaxImpulse);
|
|
|
|
if (contactCount == 0 || hasTargetVelocity)
|
|
return SolverConstraintPrepState::eUNBATCHABLE;
|
|
|
|
blockDesc.numContacts = contactCount;
|
|
blockDesc.hasMaxImpulse = hasMaxImpulse;
|
|
blockDesc.disableStrongFriction = blockDesc.disableStrongFriction || hasTargetVelocity;
|
|
|
|
blockDesc.invMassScales.linear0 *= invMassScale0;
|
|
blockDesc.invMassScales.linear1 *= invMassScale1;
|
|
blockDesc.invMassScales.angular0 *= blockDesc.body0->isKinematic ? 0.f : invInertiaScale0;
|
|
blockDesc.invMassScales.angular1 *= blockDesc.body1->isKinematic ? 0.f : invInertiaScale1;
|
|
}
|
|
|
|
return createFinalizeSolverContacts4Step(c, blockDescs,
|
|
invDtF32, totalDtF32, invTotalDtF32, dtF32, bounceThresholdF32, frictionOffsetThreshold,
|
|
correlationDistance, biasCoefficient, constraintAllocator);
|
|
}
|
|
|
|
static void setOrthoData(const PxReal& ang0X, const PxReal& ang0Y, const PxReal& ang0Z, const PxReal& ang1X, const PxReal& ang1Y, const PxReal& ang1Z,
|
|
const PxReal& recipResponse, const PxReal& error, PxReal& orthoAng0X, PxReal& orthoAng0Y, PxReal& orthoAng0Z, PxReal& orthoAng1X, PxReal& orthoAng1Y, PxReal& orthoAng1Z,
|
|
PxReal& orthoRecipResponse, PxReal& orthoError, bool disableProcessing, PxU32 solveHint, PxU32& flags, PxU32& orthoCount, bool finished)
|
|
{
|
|
if (!finished && !disableProcessing)
|
|
{
|
|
if (solveHint == PxConstraintSolveHint::eROTATIONAL_EQUALITY)
|
|
{
|
|
flags |= DY_SC_FLAG_ROT_EQ;
|
|
orthoAng0X = ang0X; orthoAng0Y = ang0Y; orthoAng0Z = ang0Z;
|
|
orthoAng1X = ang1X; orthoAng1Y = ang1Y; orthoAng1Z = ang1Z;
|
|
orthoRecipResponse = recipResponse;
|
|
orthoError = error;
|
|
orthoCount++;
|
|
}
|
|
else if (solveHint & PxConstraintSolveHint::eEQUALITY)
|
|
flags |= DY_SC_FLAG_ORTHO_TARGET;
|
|
}
|
|
}
|
|
|
|
SolverConstraintPrepState::Enum setupSolverConstraintStep4
|
|
(PxTGSSolverConstraintPrepDesc* PX_RESTRICT constraintDescs,
|
|
const PxReal dt, const PxReal totalDt, const PxReal recipdt, const PxReal recipTotalDt, PxU32& totalRows,
|
|
PxConstraintAllocator& allocator, PxU32 maxRows, const PxReal lengthScale, const PxReal biasCoefficient, bool isResidualReportingEnabled);
|
|
|
|
SolverConstraintPrepState::Enum setupSolverConstraintStep4
|
|
(SolverConstraintShaderPrepDesc* PX_RESTRICT constraintShaderDescs,
|
|
PxTGSSolverConstraintPrepDesc* PX_RESTRICT constraintDescs,
|
|
const PxReal dt, const PxReal totalDt, const PxReal recipdt, const PxReal recipTotalDt, PxU32& totalRows,
|
|
PxConstraintAllocator& allocator, const PxReal lengthScale, const PxReal biasCoefficient, bool isResidualReportingEnabled)
|
|
{
|
|
//KS - we will never get here with constraints involving articulations so we don't need to stress about those in here
|
|
|
|
totalRows = 0;
|
|
|
|
Px1DConstraint allRows[MAX_CONSTRAINT_ROWS * 4];
|
|
Px1DConstraint* rows = allRows;
|
|
Px1DConstraint* rows2 = allRows;
|
|
|
|
PxU32 maxRows = 0;
|
|
PxU32 nbToPrep = MAX_CONSTRAINT_ROWS;
|
|
|
|
for (PxU32 a = 0; a < 4; ++a)
|
|
{
|
|
SolverConstraintShaderPrepDesc& shaderDesc = constraintShaderDescs[a];
|
|
PxTGSSolverConstraintPrepDesc& desc = constraintDescs[a];
|
|
|
|
if (!shaderDesc.solverPrep)
|
|
return SolverConstraintPrepState::eUNBATCHABLE;
|
|
|
|
PX_ASSERT(rows2 + nbToPrep <= allRows + MAX_CONSTRAINT_ROWS*4);
|
|
setupConstraintRows(rows2, nbToPrep);
|
|
rows2 += nbToPrep;
|
|
|
|
desc.invMassScales.linear0 = desc.invMassScales.linear1 = desc.invMassScales.angular0 = desc.invMassScales.angular1 = 1.0f;
|
|
desc.body0WorldOffset = PxVec3(0.0f);
|
|
|
|
//TAG:solverprepcall
|
|
const PxU32 constraintCount = desc.disableConstraint ? 0 : (*shaderDesc.solverPrep)(rows,
|
|
desc.body0WorldOffset,
|
|
MAX_CONSTRAINT_ROWS,
|
|
desc.invMassScales,
|
|
shaderDesc.constantBlock,
|
|
desc.bodyFrame0, desc.bodyFrame1, desc.extendedLimits, desc.cA2w, desc.cB2w);
|
|
|
|
nbToPrep = constraintCount;
|
|
maxRows = PxMax(constraintCount, maxRows);
|
|
|
|
if (constraintCount == 0)
|
|
return SolverConstraintPrepState::eUNBATCHABLE;
|
|
|
|
desc.rows = rows;
|
|
desc.numRows = constraintCount;
|
|
rows += constraintCount;
|
|
|
|
if (desc.body0->isKinematic)
|
|
desc.invMassScales.angular0 = 0.0f;
|
|
if (desc.body1->isKinematic)
|
|
desc.invMassScales.angular1 = 0.0f;
|
|
}
|
|
|
|
return setupSolverConstraintStep4(constraintDescs, dt, totalDt, recipdt, recipTotalDt, totalRows, allocator, maxRows, lengthScale, biasCoefficient, isResidualReportingEnabled);
|
|
}
|
|
|
|
SolverConstraintPrepState::Enum setupSolverConstraintStep4
|
|
(PxTGSSolverConstraintPrepDesc* PX_RESTRICT constraintDescs,
|
|
const PxReal stepDt, const PxReal simDt, const PxReal recipStepDt, const PxReal recipSimDt, PxU32& totalRows,
|
|
PxConstraintAllocator& allocator, PxU32 maxRows,
|
|
const PxReal lengthScale, const PxReal biasCoefficient, bool isResidualReportingEnabled)
|
|
{
|
|
const Vec4V zero = V4Zero();
|
|
Px1DConstraint* allSorted[MAX_CONSTRAINT_ROWS * 4];
|
|
PxU32 startIndex[4];
|
|
PX_ALIGN(16, PxVec4) angSqrtInvInertia0[MAX_CONSTRAINT_ROWS * 4];
|
|
PX_ALIGN(16, PxVec4) angSqrtInvInertia1[MAX_CONSTRAINT_ROWS * 4];
|
|
|
|
PxU32 numRows = 0;
|
|
|
|
for (PxU32 a = 0; a < 4; ++a)
|
|
{
|
|
startIndex[a] = numRows;
|
|
PxTGSSolverConstraintPrepDesc& desc = constraintDescs[a];
|
|
Px1DConstraint** sorted = allSorted + numRows;
|
|
|
|
for (PxU32 i = 0; i < desc.numRows; ++i)
|
|
{
|
|
if (desc.rows[i].flags & Px1DConstraintFlag::eANGULAR_CONSTRAINT)
|
|
{
|
|
if (desc.rows[i].solveHint == PxConstraintSolveHint::eEQUALITY)
|
|
desc.rows[i].solveHint = PxConstraintSolveHint::eROTATIONAL_EQUALITY;
|
|
else if (desc.rows[i].solveHint == PxConstraintSolveHint::eINEQUALITY)
|
|
desc.rows[i].solveHint = PxConstraintSolveHint::eROTATIONAL_INEQUALITY;
|
|
}
|
|
}
|
|
|
|
preprocessRows(sorted, desc.rows, angSqrtInvInertia0 + numRows, angSqrtInvInertia1 + numRows, desc.numRows,
|
|
desc.body0TxI->sqrtInvInertia, desc.body1TxI->sqrtInvInertia, desc.bodyData0->invMass, desc.bodyData1->invMass,
|
|
desc.invMassScales, desc.disablePreprocessing, desc.improvedSlerp);
|
|
|
|
numRows += desc.numRows;
|
|
}
|
|
|
|
const PxU32 stride = isResidualReportingEnabled ? sizeof(SolverConstraint1DStep4WithResidual) : sizeof(SolverConstraint1DStep4);
|
|
|
|
const PxU32 constraintLength = sizeof(SolverConstraint1DHeaderStep4) + stride * maxRows;
|
|
|
|
//KS - +16 is for the constraint progress counter, which needs to be the last element in the constraint (so that we
|
|
//know SPU DMAs have completed)
|
|
PxU8* ptr = allocator.reserveConstraintData(constraintLength + 16u);
|
|
if(!checkConstraintDataPtr<true>(ptr))
|
|
{
|
|
for (PxU32 a = 0; a < 4; ++a)
|
|
{
|
|
PxTGSSolverConstraintPrepDesc& desc = constraintDescs[a];
|
|
desc.desc->constraint = NULL;
|
|
desc.desc->constraintLengthOver16 = 0;
|
|
desc.desc->writeBack = desc.writeback;
|
|
}
|
|
return SolverConstraintPrepState::eOUT_OF_MEMORY;
|
|
}
|
|
//desc.constraint = ptr;
|
|
|
|
totalRows = numRows;
|
|
|
|
const PxReal erp = 0.5f * biasCoefficient;
|
|
|
|
const bool isKinematic00 = constraintDescs[0].body0->isKinematic;
|
|
const bool isKinematic01 = constraintDescs[0].body1->isKinematic;
|
|
const bool isKinematic10 = constraintDescs[1].body0->isKinematic;
|
|
const bool isKinematic11 = constraintDescs[1].body1->isKinematic;
|
|
const bool isKinematic20 = constraintDescs[2].body0->isKinematic;
|
|
const bool isKinematic21 = constraintDescs[2].body1->isKinematic;
|
|
const bool isKinematic30 = constraintDescs[3].body0->isKinematic;
|
|
const bool isKinematic31 = constraintDescs[3].body1->isKinematic;
|
|
|
|
for (PxU32 a = 0; a < 4; ++a)
|
|
{
|
|
PxTGSSolverConstraintPrepDesc& desc = constraintDescs[a];
|
|
desc.desc->constraint = ptr;
|
|
desc.desc->constraintLengthOver16 = PxU16(constraintLength/16);
|
|
desc.desc->writeBack = desc.writeback;
|
|
}
|
|
|
|
{
|
|
PxU8* currPtr = ptr;
|
|
SolverConstraint1DHeaderStep4* header = reinterpret_cast<SolverConstraint1DHeaderStep4*>(currPtr);
|
|
currPtr += sizeof(SolverConstraint1DHeaderStep4);
|
|
|
|
const PxTGSSolverBodyData& bd00 = *constraintDescs[0].bodyData0;
|
|
const PxTGSSolverBodyData& bd01 = *constraintDescs[1].bodyData0;
|
|
const PxTGSSolverBodyData& bd02 = *constraintDescs[2].bodyData0;
|
|
const PxTGSSolverBodyData& bd03 = *constraintDescs[3].bodyData0;
|
|
|
|
const PxTGSSolverBodyData& bd10 = *constraintDescs[0].bodyData1;
|
|
const PxTGSSolverBodyData& bd11 = *constraintDescs[1].bodyData1;
|
|
const PxTGSSolverBodyData& bd12 = *constraintDescs[2].bodyData1;
|
|
const PxTGSSolverBodyData& bd13 = *constraintDescs[3].bodyData1;
|
|
|
|
//Load up masses, invInertia, velocity etc.
|
|
|
|
const Vec4V invMassScale0 = V4LoadXYZW(constraintDescs[0].invMassScales.linear0, constraintDescs[1].invMassScales.linear0,
|
|
constraintDescs[2].invMassScales.linear0, constraintDescs[3].invMassScales.linear0);
|
|
const Vec4V invMassScale1 = V4LoadXYZW(constraintDescs[0].invMassScales.linear1, constraintDescs[1].invMassScales.linear1,
|
|
constraintDescs[2].invMassScales.linear1, constraintDescs[3].invMassScales.linear1);
|
|
|
|
const Vec4V iMass0 = V4LoadXYZW(bd00.invMass, bd01.invMass, bd02.invMass, bd03.invMass);
|
|
|
|
const Vec4V iMass1 = V4LoadXYZW(bd10.invMass, bd11.invMass, bd12.invMass, bd13.invMass);
|
|
|
|
const Vec4V invMass0 = V4Mul(iMass0, invMassScale0);
|
|
const Vec4V invMass1 = V4Mul(iMass1, invMassScale1);
|
|
|
|
const Vec4V invInertiaScale0 = V4LoadXYZW(constraintDescs[0].invMassScales.angular0, constraintDescs[1].invMassScales.angular0,
|
|
constraintDescs[2].invMassScales.angular0, constraintDescs[3].invMassScales.angular0);
|
|
const Vec4V invInertiaScale1 = V4LoadXYZW(constraintDescs[0].invMassScales.angular1, constraintDescs[1].invMassScales.angular1,
|
|
constraintDescs[2].invMassScales.angular1, constraintDescs[3].invMassScales.angular1);
|
|
|
|
//body world offsets
|
|
Vec4V workOffset0 = V4LoadU(&constraintDescs[0].body0WorldOffset.x);
|
|
Vec4V workOffset1 = V4LoadU(&constraintDescs[1].body0WorldOffset.x);
|
|
Vec4V workOffset2 = V4LoadU(&constraintDescs[2].body0WorldOffset.x);
|
|
Vec4V workOffset3 = V4LoadU(&constraintDescs[3].body0WorldOffset.x);
|
|
|
|
Vec4V workOffsetX, workOffsetY, workOffsetZ;
|
|
|
|
PX_TRANSPOSE_44_34(workOffset0, workOffset1, workOffset2, workOffset3, workOffsetX, workOffsetY, workOffsetZ);
|
|
|
|
const FloatV dtV = FLoad(simDt);
|
|
Vec4V linBreakForce = V4LoadXYZW( constraintDescs[0].linBreakForce, constraintDescs[1].linBreakForce,
|
|
constraintDescs[2].linBreakForce, constraintDescs[3].linBreakForce);
|
|
Vec4V angBreakForce = V4LoadXYZW( constraintDescs[0].angBreakForce, constraintDescs[1].angBreakForce,
|
|
constraintDescs[2].angBreakForce, constraintDescs[3].angBreakForce);
|
|
|
|
header->breakable[0] = PxU8((constraintDescs[0].linBreakForce != PX_MAX_F32) || (constraintDescs[0].angBreakForce != PX_MAX_F32));
|
|
header->breakable[1] = PxU8((constraintDescs[1].linBreakForce != PX_MAX_F32) || (constraintDescs[1].angBreakForce != PX_MAX_F32));
|
|
header->breakable[2] = PxU8((constraintDescs[2].linBreakForce != PX_MAX_F32) || (constraintDescs[2].angBreakForce != PX_MAX_F32));
|
|
header->breakable[3] = PxU8((constraintDescs[3].linBreakForce != PX_MAX_F32) || (constraintDescs[3].angBreakForce != PX_MAX_F32));
|
|
|
|
//OK, I think that's everything loaded in
|
|
|
|
header->invMass0D0 = invMass0;
|
|
header->invMass1D1 = invMass1;
|
|
header->angD0 = invInertiaScale0;
|
|
header->angD1 = invInertiaScale1;
|
|
header->body0WorkOffset[0] = workOffsetX;
|
|
header->body0WorkOffset[1] = workOffsetY;
|
|
header->body0WorkOffset[2] = workOffsetZ;
|
|
|
|
header->count = maxRows;
|
|
header->type = DY_SC_TYPE_BLOCK_1D;
|
|
header->linBreakImpulse = V4Scale(linBreakForce, dtV);
|
|
header->angBreakImpulse = V4Scale(angBreakForce, dtV);
|
|
header->counts[0] = PxTo8(constraintDescs[0].numRows);
|
|
header->counts[1] = PxTo8(constraintDescs[1].numRows);
|
|
header->counts[2] = PxTo8(constraintDescs[2].numRows);
|
|
header->counts[3] = PxTo8(constraintDescs[3].numRows);
|
|
|
|
Vec4V ca2WX, ca2WY, ca2WZ;
|
|
Vec4V cb2WX, cb2WY, cb2WZ;
|
|
|
|
Vec4V ca2W0 = V4LoadU(&constraintDescs[0].cA2w.x);
|
|
Vec4V ca2W1 = V4LoadU(&constraintDescs[1].cA2w.x);
|
|
Vec4V ca2W2 = V4LoadU(&constraintDescs[2].cA2w.x);
|
|
Vec4V ca2W3 = V4LoadU(&constraintDescs[3].cA2w.x);
|
|
|
|
Vec4V cb2W0 = V4LoadU(&constraintDescs[0].cB2w.x);
|
|
Vec4V cb2W1 = V4LoadU(&constraintDescs[1].cB2w.x);
|
|
Vec4V cb2W2 = V4LoadU(&constraintDescs[2].cB2w.x);
|
|
Vec4V cb2W3 = V4LoadU(&constraintDescs[3].cB2w.x);
|
|
|
|
PX_TRANSPOSE_44_34(ca2W0, ca2W1, ca2W2, ca2W3, ca2WX, ca2WY, ca2WZ);
|
|
PX_TRANSPOSE_44_34(cb2W0, cb2W1, cb2W2, cb2W3, cb2WX, cb2WY, cb2WZ);
|
|
|
|
Vec4V pos00 = V4LoadA(&constraintDescs[0].body0TxI->body2WorldP.x);
|
|
Vec4V pos01 = V4LoadA(&constraintDescs[0].body1TxI->body2WorldP.x);
|
|
Vec4V pos10 = V4LoadA(&constraintDescs[1].body0TxI->body2WorldP.x);
|
|
Vec4V pos11 = V4LoadA(&constraintDescs[1].body1TxI->body2WorldP.x);
|
|
Vec4V pos20 = V4LoadA(&constraintDescs[2].body0TxI->body2WorldP.x);
|
|
Vec4V pos21 = V4LoadA(&constraintDescs[2].body1TxI->body2WorldP.x);
|
|
Vec4V pos30 = V4LoadA(&constraintDescs[3].body0TxI->body2WorldP.x);
|
|
Vec4V pos31 = V4LoadA(&constraintDescs[3].body1TxI->body2WorldP.x);
|
|
|
|
Vec4V pos0X, pos0Y, pos0Z;
|
|
Vec4V pos1X, pos1Y, pos1Z;
|
|
|
|
PX_TRANSPOSE_44_34(pos00, pos10, pos20, pos30, pos0X, pos0Y, pos0Z);
|
|
PX_TRANSPOSE_44_34(pos01, pos11, pos21, pos31, pos1X, pos1Y, pos1Z);
|
|
|
|
Vec4V linVel00 = V4LoadA(&constraintDescs[0].bodyData0->originalLinearVelocity.x);
|
|
Vec4V linVel01 = V4LoadA(&constraintDescs[0].bodyData1->originalLinearVelocity.x);
|
|
Vec4V angState00 = V4LoadA(&constraintDescs[0].bodyData0->originalAngularVelocity.x);
|
|
Vec4V angState01 = V4LoadA(&constraintDescs[0].bodyData1->originalAngularVelocity.x);
|
|
|
|
Vec4V linVel10 = V4LoadA(&constraintDescs[1].bodyData0->originalLinearVelocity.x);
|
|
Vec4V linVel11 = V4LoadA(&constraintDescs[1].bodyData1->originalLinearVelocity.x);
|
|
Vec4V angState10 = V4LoadA(&constraintDescs[1].bodyData0->originalAngularVelocity.x);
|
|
Vec4V angState11 = V4LoadA(&constraintDescs[1].bodyData1->originalAngularVelocity.x);
|
|
|
|
Vec4V linVel20 = V4LoadA(&constraintDescs[2].bodyData0->originalLinearVelocity.x);
|
|
Vec4V linVel21 = V4LoadA(&constraintDescs[2].bodyData1->originalLinearVelocity.x);
|
|
Vec4V angState20 = V4LoadA(&constraintDescs[2].bodyData0->originalAngularVelocity.x);
|
|
Vec4V angState21 = V4LoadA(&constraintDescs[2].bodyData1->originalAngularVelocity.x);
|
|
|
|
Vec4V linVel30 = V4LoadA(&constraintDescs[3].bodyData0->originalLinearVelocity.x);
|
|
Vec4V linVel31 = V4LoadA(&constraintDescs[3].bodyData1->originalLinearVelocity.x);
|
|
Vec4V angState30 = V4LoadA(&constraintDescs[3].bodyData0->originalAngularVelocity.x);
|
|
Vec4V angState31 = V4LoadA(&constraintDescs[3].bodyData1->originalAngularVelocity.x);
|
|
|
|
Vec4V linVel0T0, linVel0T1, linVel0T2;
|
|
Vec4V linVel1T0, linVel1T1, linVel1T2;
|
|
Vec4V angState0T0, angState0T1, angState0T2;
|
|
Vec4V angState1T0, angState1T1, angState1T2;
|
|
|
|
PX_TRANSPOSE_44_34(linVel00, linVel10, linVel20, linVel30, linVel0T0, linVel0T1, linVel0T2);
|
|
PX_TRANSPOSE_44_34(linVel01, linVel11, linVel21, linVel31, linVel1T0, linVel1T1, linVel1T2);
|
|
PX_TRANSPOSE_44_34(angState00, angState10, angState20, angState30, angState0T0, angState0T1, angState0T2);
|
|
PX_TRANSPOSE_44_34(angState01, angState11, angState21, angState31, angState1T0, angState1T1, angState1T2);
|
|
|
|
const Vec4V raWorldX = V4Sub(ca2WX, pos0X);
|
|
const Vec4V raWorldY = V4Sub(ca2WY, pos0Y);
|
|
const Vec4V raWorldZ = V4Sub(ca2WZ, pos0Z);
|
|
|
|
const Vec4V rbWorldX = V4Sub(cb2WX, pos1X);
|
|
const Vec4V rbWorldY = V4Sub(cb2WY, pos1Y);
|
|
const Vec4V rbWorldZ = V4Sub(cb2WZ, pos1Z);
|
|
|
|
header->rAWorld[0] = raWorldX;
|
|
header->rAWorld[1] = raWorldY;
|
|
header->rAWorld[2] = raWorldZ;
|
|
|
|
header->rBWorld[0] = rbWorldX;
|
|
header->rBWorld[1] = rbWorldY;
|
|
header->rBWorld[2] = rbWorldZ;
|
|
|
|
//Now we loop over the constraints and build the results...
|
|
|
|
PxU32 index0 = 0;
|
|
PxU32 endIndex0 = constraintDescs[0].numRows - 1;
|
|
PxU32 index1 = startIndex[1];
|
|
PxU32 endIndex1 = index1 + constraintDescs[1].numRows - 1;
|
|
PxU32 index2 = startIndex[2];
|
|
PxU32 endIndex2 = index2 + constraintDescs[2].numRows - 1;
|
|
PxU32 index3 = startIndex[3];
|
|
PxU32 endIndex3 = index3 + constraintDescs[3].numRows - 1;
|
|
|
|
const Vec4V one = V4One();
|
|
|
|
PxU32 orthoCount0 = 0, orthoCount1 = 0, orthoCount2 = 0, orthoCount3 = 0;
|
|
for (PxU32 a = 0; a < 3; ++a)
|
|
{
|
|
header->angOrthoAxis0X[a] = V4Zero();
|
|
header->angOrthoAxis0Y[a] = V4Zero();
|
|
header->angOrthoAxis0Z[a] = V4Zero();
|
|
|
|
header->angOrthoAxis1X[a] = V4Zero();
|
|
header->angOrthoAxis1Y[a] = V4Zero();
|
|
header->angOrthoAxis1Z[a] = V4Zero();
|
|
|
|
header->angOrthoRecipResponse[a] = V4Zero();
|
|
header->angOrthoError[a] = V4Zero();
|
|
}
|
|
|
|
for (PxU32 a = 0; a < maxRows; ++a)
|
|
{
|
|
const bool finished[] = { a >= constraintDescs[0].numRows, a >= constraintDescs[1].numRows, a >= constraintDescs[2].numRows, a >= constraintDescs[3].numRows };
|
|
BoolV bFinished = BLoad(finished);
|
|
SolverConstraint1DStep4* c = reinterpret_cast<SolverConstraint1DStep4*>(currPtr);
|
|
currPtr += stride;
|
|
|
|
Px1DConstraint* con0 = allSorted[index0];
|
|
Px1DConstraint* con1 = allSorted[index1];
|
|
Px1DConstraint* con2 = allSorted[index2];
|
|
Px1DConstraint* con3 = allSorted[index3];
|
|
|
|
const bool angularConstraint[4] =
|
|
{
|
|
!!(con0->flags & Px1DConstraintFlag::eANGULAR_CONSTRAINT),
|
|
!!(con1->flags & Px1DConstraintFlag::eANGULAR_CONSTRAINT),
|
|
!!(con2->flags & Px1DConstraintFlag::eANGULAR_CONSTRAINT),
|
|
!!(con3->flags & Px1DConstraintFlag::eANGULAR_CONSTRAINT),
|
|
};
|
|
|
|
BoolV bAngularConstraint = BLoad(angularConstraint);
|
|
|
|
Vec4V cangDelta00 = V4LoadA(&angSqrtInvInertia0[index0].x);
|
|
Vec4V cangDelta01 = V4LoadA(&angSqrtInvInertia0[index1].x);
|
|
Vec4V cangDelta02 = V4LoadA(&angSqrtInvInertia0[index2].x);
|
|
Vec4V cangDelta03 = V4LoadA(&angSqrtInvInertia0[index3].x);
|
|
|
|
Vec4V cangDelta10 = V4LoadA(&angSqrtInvInertia1[index0].x);
|
|
Vec4V cangDelta11 = V4LoadA(&angSqrtInvInertia1[index1].x);
|
|
Vec4V cangDelta12 = V4LoadA(&angSqrtInvInertia1[index2].x);
|
|
Vec4V cangDelta13 = V4LoadA(&angSqrtInvInertia1[index3].x);
|
|
|
|
index0 = index0 == endIndex0 ? index0 : index0 + 1;
|
|
index1 = index1 == endIndex1 ? index1 : index1 + 1;
|
|
index2 = index2 == endIndex2 ? index2 : index2 + 1;
|
|
index3 = index3 == endIndex3 ? index3 : index3 + 1;
|
|
|
|
PxReal minImpulse0, minImpulse1, minImpulse2, minImpulse3;
|
|
PxReal maxImpulse0, maxImpulse1, maxImpulse2, maxImpulse3;
|
|
computeMinMaxImpulseOrForceAsImpulse(
|
|
con0->minImpulse, con0->maxImpulse,
|
|
con0->flags & Px1DConstraintFlag::eHAS_DRIVE_LIMIT, constraintDescs[0].driveLimitsAreForces, simDt,
|
|
minImpulse0, maxImpulse0);
|
|
computeMinMaxImpulseOrForceAsImpulse(
|
|
con1->minImpulse, con1->maxImpulse,
|
|
con1->flags & Px1DConstraintFlag::eHAS_DRIVE_LIMIT, constraintDescs[1].driveLimitsAreForces, simDt,
|
|
minImpulse1, maxImpulse1);
|
|
computeMinMaxImpulseOrForceAsImpulse(
|
|
con2->minImpulse, con2->maxImpulse,
|
|
con2->flags & Px1DConstraintFlag::eHAS_DRIVE_LIMIT, constraintDescs[2].driveLimitsAreForces, simDt,
|
|
minImpulse2, maxImpulse2);
|
|
computeMinMaxImpulseOrForceAsImpulse(
|
|
con3->minImpulse, con3->maxImpulse,
|
|
con3->flags & Px1DConstraintFlag::eHAS_DRIVE_LIMIT, constraintDescs[3].driveLimitsAreForces, simDt,
|
|
minImpulse3, maxImpulse3);
|
|
const Vec4V minImpulse = V4LoadXYZW(minImpulse0, minImpulse1, minImpulse2, minImpulse3);
|
|
const Vec4V maxImpulse = V4LoadXYZW(maxImpulse0, maxImpulse1, maxImpulse2, maxImpulse3);
|
|
|
|
Vec4V clin00 = V4LoadA(&con0->linear0.x);
|
|
Vec4V clin01 = V4LoadA(&con1->linear0.x);
|
|
Vec4V clin02 = V4LoadA(&con2->linear0.x);
|
|
Vec4V clin03 = V4LoadA(&con3->linear0.x);
|
|
|
|
Vec4V clin0X, clin0Y, clin0Z;
|
|
|
|
PX_TRANSPOSE_44_34(clin00, clin01, clin02, clin03, clin0X, clin0Y, clin0Z);
|
|
|
|
Vec4V cang00 = V4LoadA(&con0->angular0.x);
|
|
Vec4V cang01 = V4LoadA(&con1->angular0.x);
|
|
Vec4V cang02 = V4LoadA(&con2->angular0.x);
|
|
Vec4V cang03 = V4LoadA(&con3->angular0.x);
|
|
|
|
Vec4V cang0X, cang0Y, cang0Z;
|
|
|
|
PX_TRANSPOSE_44_34(cang00, cang01, cang02, cang03, cang0X, cang0Y, cang0Z);
|
|
|
|
Vec4V cang10 = V4LoadA(&con0->angular1.x);
|
|
Vec4V cang11 = V4LoadA(&con1->angular1.x);
|
|
Vec4V cang12 = V4LoadA(&con2->angular1.x);
|
|
Vec4V cang13 = V4LoadA(&con3->angular1.x);
|
|
|
|
Vec4V cang1X, cang1Y, cang1Z;
|
|
|
|
PX_TRANSPOSE_44_34(cang10, cang11, cang12, cang13, cang1X, cang1Y, cang1Z);
|
|
|
|
Vec4V angDelta0X, angDelta0Y, angDelta0Z;
|
|
|
|
PX_TRANSPOSE_44_34(cangDelta00, cangDelta01, cangDelta02, cangDelta03, angDelta0X, angDelta0Y, angDelta0Z);
|
|
|
|
c->flags[0] = 0;
|
|
c->flags[1] = 0;
|
|
c->flags[2] = 0;
|
|
c->flags[3] = 0;
|
|
|
|
c->lin0[0] = V4Sel(bFinished, zero, clin0X);
|
|
c->lin0[1] = V4Sel(bFinished, zero, clin0Y);
|
|
c->lin0[2] = V4Sel(bFinished, zero, clin0Z);
|
|
c->ang0[0] = V4Sel(BAndNot(bAngularConstraint, bFinished), cang0X, zero);
|
|
c->ang0[1] = V4Sel(BAndNot(bAngularConstraint, bFinished), cang0Y, zero);
|
|
c->ang0[2] = V4Sel(BAndNot(bAngularConstraint, bFinished), cang0Z, zero);
|
|
c->angularErrorScale = V4Sel(bAngularConstraint, one, zero);
|
|
|
|
c->minImpulse = minImpulse;
|
|
c->maxImpulse = maxImpulse;
|
|
c->appliedForce = zero;
|
|
if (isResidualReportingEnabled)
|
|
{
|
|
SolverConstraint1DStep4WithResidual* cc = static_cast<SolverConstraint1DStep4WithResidual*>(c);
|
|
cc->residualPosIter = zero;
|
|
cc->residualVelIter = zero;
|
|
}
|
|
|
|
const Vec4V lin0MagSq = V4MulAdd(clin0Z, clin0Z, V4MulAdd(clin0Y, clin0Y, V4Mul(clin0X, clin0X)));
|
|
const Vec4V cang0DotAngDelta = V4MulAdd(angDelta0Z, angDelta0Z, V4MulAdd(angDelta0Y, angDelta0Y, V4Mul(angDelta0X, angDelta0X)));
|
|
|
|
Vec4V unitResponse = V4MulAdd(lin0MagSq, invMass0, V4Mul(cang0DotAngDelta, invInertiaScale0));
|
|
|
|
Vec4V clin10 = V4LoadA(&con0->linear1.x);
|
|
Vec4V clin11 = V4LoadA(&con1->linear1.x);
|
|
Vec4V clin12 = V4LoadA(&con2->linear1.x);
|
|
Vec4V clin13 = V4LoadA(&con3->linear1.x);
|
|
|
|
Vec4V clin1X, clin1Y, clin1Z;
|
|
PX_TRANSPOSE_44_34(clin10, clin11, clin12, clin13, clin1X, clin1Y, clin1Z);
|
|
|
|
Vec4V angDelta1X, angDelta1Y, angDelta1Z;
|
|
|
|
PX_TRANSPOSE_44_34(cangDelta10, cangDelta11, cangDelta12, cangDelta13, angDelta1X, angDelta1Y, angDelta1Z);
|
|
|
|
const Vec4V lin1MagSq = V4MulAdd(clin1Z, clin1Z, V4MulAdd(clin1Y, clin1Y, V4Mul(clin1X, clin1X)));
|
|
const Vec4V cang1DotAngDelta = V4MulAdd(angDelta1Z, angDelta1Z, V4MulAdd(angDelta1Y, angDelta1Y, V4Mul(angDelta1X, angDelta1X)));
|
|
|
|
c->lin1[0] = V4Sel(bFinished, zero, clin1X);
|
|
c->lin1[1] = V4Sel(bFinished, zero, clin1Y);
|
|
c->lin1[2] = V4Sel(bFinished, zero, clin1Z);
|
|
|
|
c->ang1[0] = V4Sel(BAndNot(bAngularConstraint, bFinished), cang1X, zero);
|
|
c->ang1[1] = V4Sel(BAndNot(bAngularConstraint, bFinished), cang1Y, zero);
|
|
c->ang1[2] = V4Sel(BAndNot(bAngularConstraint, bFinished), cang1Z, zero);
|
|
|
|
unitResponse = V4Add(unitResponse, V4MulAdd(lin1MagSq, invMass1, V4Mul(cang1DotAngDelta, invInertiaScale1)));
|
|
|
|
const Vec4V lnormalVel0 = V4MulAdd(clin0X, linVel0T0, V4MulAdd(clin0Y, linVel0T1, V4Mul(clin0Z, linVel0T2)));
|
|
const Vec4V lnormalVel1 = V4MulAdd(clin1X, linVel1T0, V4MulAdd(clin1Y, linVel1T1, V4Mul(clin1Z, linVel1T2)));
|
|
|
|
const Vec4V angVel0 = V4MulAdd(cang0X, angState0T0, V4MulAdd(cang0Y, angState0T1, V4Mul(cang0Z, angState0T2)));
|
|
const Vec4V angVel1 = V4MulAdd(angDelta1X, angState1T0, V4MulAdd(angDelta1Y, angState1T1, V4Mul(angDelta1Z, angState1T2)));
|
|
|
|
const Vec4V normalVel0 = V4Add(lnormalVel0, angVel0);
|
|
const Vec4V normalVel1 = V4Add(lnormalVel1, angVel1);
|
|
|
|
const Vec4V normalVel = V4Sub(normalVel0, normalVel1);
|
|
|
|
angDelta0X = V4Mul(angDelta0X, invInertiaScale0);
|
|
angDelta0Y = V4Mul(angDelta0Y, invInertiaScale0);
|
|
angDelta0Z = V4Mul(angDelta0Z, invInertiaScale0);
|
|
|
|
angDelta1X = V4Mul(angDelta1X, invInertiaScale1);
|
|
angDelta1Y = V4Mul(angDelta1Y, invInertiaScale1);
|
|
angDelta1Z = V4Mul(angDelta1Z, invInertiaScale1);
|
|
|
|
{
|
|
//Constant inputs
|
|
const Px1DConstraint* constraints[4] = {con0, con1, con2, con3};
|
|
const PxReal* unitResponses4 = reinterpret_cast<const PxReal*>(&unitResponse);
|
|
const PxReal* nVel0s4 = reinterpret_cast<const PxReal*>(&normalVel0);
|
|
const PxReal* nVel1s4 = reinterpret_cast<const PxReal*>(&normalVel1);
|
|
const PxReal* jointSpeedForRestitutionBounces4 = reinterpret_cast<const PxReal*>(&normalVel);
|
|
const bool isBody0Kinematics[4] = {isKinematic00, isKinematic10, isKinematic20, isKinematic30};
|
|
const bool isBody1Kinematics[4] = {isKinematic01, isKinematic11, isKinematic21, isKinematic31};
|
|
|
|
//outputs
|
|
PxReal* biasScales4 = reinterpret_cast<PxReal*>(&c->biasScale);
|
|
PxReal* errors4 = reinterpret_cast<PxReal*>(&c->error);
|
|
PxReal* velMultipliers4 = reinterpret_cast<PxReal*>(&c->velMultiplier);
|
|
PxReal* targetScales4 = reinterpret_cast<PxReal*>(&c->velTarget);
|
|
PxReal* maxBiasSpeeds4 = reinterpret_cast<PxReal*>(&c->maxBias);
|
|
PxReal recipResponses[4];
|
|
PxReal originalError[4];
|
|
|
|
for(PxU32 i = 0; i < 4; i++)
|
|
{
|
|
|
|
if(a < constraintDescs[i].numRows)
|
|
{
|
|
const PxReal minRowResponseI = constraintDescs[i].minResponseThreshold;
|
|
const PxU16 constraintFlagsI = constraints[i]->flags;
|
|
const PxReal stiffnessI = constraints[i]->mods.spring.stiffness;
|
|
const PxReal dampingI = constraints[i]->mods.spring.damping;
|
|
const PxReal restitutionI = constraints[i]->mods.bounce.restitution;
|
|
const PxReal bounceThresholdVelocityI = constraints[i]->mods.bounce.velocityThreshold;
|
|
const PxReal geometricErrorI = constraints[i]->geometricError;
|
|
originalError[i] = geometricErrorI;
|
|
|
|
const PxReal velocityTargetI = constraints[i]->velocityTarget;
|
|
const PxReal jointSpeedForRestitutionBounceI = jointSpeedForRestitutionBounces4[i];
|
|
const PxReal unitResponseI = unitResponses4[i];
|
|
|
|
const PxReal recipUnitResponseI = computeRecipUnitResponse(unitResponseI, minRowResponseI);
|
|
recipResponses[i] = recipUnitResponseI;
|
|
|
|
const PxReal maxBiasVelocityI = computeMaxBiasVelocityTGS(
|
|
constraintFlagsI,
|
|
jointSpeedForRestitutionBounceI, bounceThresholdVelocityI,
|
|
restitutionI, geometricErrorI,
|
|
false, lengthScale, recipSimDt);
|
|
|
|
PxReal initJointSpeedI = 0.f;
|
|
if (isBody0Kinematics[i])
|
|
initJointSpeedI -= nVel0s4[i];
|
|
if (isBody1Kinematics[i])
|
|
initJointSpeedI += nVel1s4[i];
|
|
|
|
const Constraint1dSolverConstantsTGS desc = compute1dConstraintSolverConstantsTGS(
|
|
constraintFlagsI,
|
|
stiffnessI, dampingI,
|
|
restitutionI, bounceThresholdVelocityI,
|
|
geometricErrorI, velocityTargetI,
|
|
jointSpeedForRestitutionBounceI, initJointSpeedI,
|
|
unitResponseI, recipUnitResponseI,
|
|
erp,
|
|
stepDt, recipStepDt);
|
|
|
|
biasScales4[i] = desc.biasScale;
|
|
errors4[i] = desc.error;
|
|
velMultipliers4[i] = desc.velMultiplier;
|
|
targetScales4[i] = desc.targetVel;
|
|
maxBiasSpeeds4[i] = maxBiasVelocityI;
|
|
}
|
|
else
|
|
{
|
|
biasScales4[i] = 0.0f;
|
|
errors4[i] = 0.0f;
|
|
velMultipliers4[i] = 0.0f;
|
|
targetScales4[i] = 0.0f;
|
|
maxBiasSpeeds4[i] = 0.0f;
|
|
}
|
|
|
|
raiseInternalFlagsTGS(constraints[i]->flags, constraints[i]->solveHint, c->flags[i]);
|
|
}
|
|
|
|
PxVec4* angOrthoAxes0X = reinterpret_cast<PxVec4*>(header->angOrthoAxis0X);
|
|
PxVec4* angOrthoAxes0Y = reinterpret_cast<PxVec4*>(header->angOrthoAxis0Y);
|
|
PxVec4* angOrthoAxes0Z = reinterpret_cast<PxVec4*>(header->angOrthoAxis0Z);
|
|
PxVec4* angOrthoAxes1X = reinterpret_cast<PxVec4*>(header->angOrthoAxis1X);
|
|
PxVec4* angOrthoAxes1Y = reinterpret_cast<PxVec4*>(header->angOrthoAxis1Y);
|
|
PxVec4* angOrthoAxes1Z = reinterpret_cast<PxVec4*>(header->angOrthoAxis1Z);
|
|
PxVec4* orthoRecipResponse = reinterpret_cast<PxVec4*>(header->angOrthoRecipResponse);
|
|
PxVec4* orthoError = reinterpret_cast<PxVec4*>(header->angOrthoError);
|
|
|
|
const PxVec4& ang0X = reinterpret_cast<const PxVec4&>(angDelta0X);
|
|
const PxVec4& ang0Y = reinterpret_cast<const PxVec4&>(angDelta0Y);
|
|
const PxVec4& ang0Z = reinterpret_cast<const PxVec4&>(angDelta0Z);
|
|
|
|
const PxVec4& ang1X = reinterpret_cast<const PxVec4&>(angDelta1X);
|
|
const PxVec4& ang1Y = reinterpret_cast<const PxVec4&>(angDelta1Y);
|
|
const PxVec4& ang1Z = reinterpret_cast<const PxVec4&>(angDelta1Z);
|
|
|
|
setOrthoData(
|
|
ang0X.x, ang0Y.x, ang0Z.x, ang1X.x, ang1Y.x, ang1Z.x,
|
|
recipResponses[0], originalError[0],
|
|
angOrthoAxes0X[orthoCount0].x, angOrthoAxes0Y[orthoCount0].x, angOrthoAxes0Z[orthoCount0].x,
|
|
angOrthoAxes1X[orthoCount0].x, angOrthoAxes1Y[orthoCount0].x, angOrthoAxes1Z[orthoCount0].x,
|
|
orthoRecipResponse[orthoCount0].x, orthoError[orthoCount0].x,
|
|
constraintDescs[0].disablePreprocessing, con0->solveHint,
|
|
c->flags[0], orthoCount0, a >= constraintDescs[0].numRows);
|
|
|
|
setOrthoData(
|
|
ang0X.y, ang0Y.y, ang0Z.y, ang1X.y, ang1Y.y, ang1Z.y,
|
|
recipResponses[1], originalError[1],
|
|
angOrthoAxes0X[orthoCount1].y, angOrthoAxes0Y[orthoCount1].y, angOrthoAxes0Z[orthoCount1].y,
|
|
angOrthoAxes1X[orthoCount1].y, angOrthoAxes1Y[orthoCount1].y, angOrthoAxes1Z[orthoCount1].y,
|
|
orthoRecipResponse[orthoCount1].y, orthoError[orthoCount1].y,
|
|
constraintDescs[1].disablePreprocessing, con1->solveHint,
|
|
c->flags[1], orthoCount1, a >= constraintDescs[1].numRows);
|
|
|
|
setOrthoData(
|
|
ang0X.z, ang0Y.z, ang0Z.z, ang1X.z, ang1Y.z, ang1Z.z,
|
|
recipResponses[2], originalError[2],
|
|
angOrthoAxes0X[orthoCount2].z, angOrthoAxes0Y[orthoCount2].z, angOrthoAxes0Z[orthoCount2].z,
|
|
angOrthoAxes1X[orthoCount2].z, angOrthoAxes1Y[orthoCount2].z, angOrthoAxes1Z[orthoCount2].z,
|
|
orthoRecipResponse[orthoCount2].z, orthoError[orthoCount2].z,
|
|
constraintDescs[2].disablePreprocessing, con2->solveHint,
|
|
c->flags[2], orthoCount2, a >= constraintDescs[2].numRows);
|
|
|
|
setOrthoData(
|
|
ang0X.w, ang0Y.w, ang0Z.w, ang1X.w, ang1Y.w, ang1Z.w,
|
|
recipResponses[3], originalError[3],
|
|
angOrthoAxes0X[orthoCount3].w, angOrthoAxes0Y[orthoCount3].w, angOrthoAxes0Z[orthoCount3].w,
|
|
angOrthoAxes1X[orthoCount3].w, angOrthoAxes1Y[orthoCount3].w, angOrthoAxes1Z[orthoCount3].w,
|
|
orthoRecipResponse[orthoCount3].w, orthoError[orthoCount3].w,
|
|
constraintDescs[3].disablePreprocessing, con3->solveHint,
|
|
c->flags[3], orthoCount3, a >= constraintDescs[3].numRows);
|
|
}
|
|
}
|
|
*(reinterpret_cast<PxU32*>(currPtr)) = 0;
|
|
*(reinterpret_cast<PxU32*>(currPtr + 4)) = 0;
|
|
}
|
|
|
|
//OK, we're ready to allocate and solve prep these constraints now :-)
|
|
return SolverConstraintPrepState::eSUCCESS;
|
|
}
|
|
|
|
static void solveContact4_Block(const PxSolverConstraintDesc* PX_RESTRICT desc, const bool doFriction, const PxReal minPenetration,
|
|
const PxReal elapsedTimeF32, SolverContext& cache)
|
|
{
|
|
PxTGSSolverBodyVel& b00 = *desc[0].tgsBodyA;
|
|
PxTGSSolverBodyVel& b01 = *desc[0].tgsBodyB;
|
|
PxTGSSolverBodyVel& b10 = *desc[1].tgsBodyA;
|
|
PxTGSSolverBodyVel& b11 = *desc[1].tgsBodyB;
|
|
PxTGSSolverBodyVel& b20 = *desc[2].tgsBodyA;
|
|
PxTGSSolverBodyVel& b21 = *desc[2].tgsBodyB;
|
|
PxTGSSolverBodyVel& b30 = *desc[3].tgsBodyA;
|
|
PxTGSSolverBodyVel& b31 = *desc[3].tgsBodyB;
|
|
|
|
const Vec4V minPen = V4Load(minPenetration);
|
|
|
|
const Vec4V elapsedTime = V4Load(elapsedTimeF32);
|
|
|
|
//We'll need this.
|
|
const Vec4V vZero = V4Zero();
|
|
|
|
Vec4V linVel00 = V4LoadA(&b00.linearVelocity.x);
|
|
Vec4V linVel01 = V4LoadA(&b01.linearVelocity.x);
|
|
Vec4V angState00 = V4LoadA(&b00.angularVelocity.x);
|
|
Vec4V angState01 = V4LoadA(&b01.angularVelocity.x);
|
|
|
|
Vec4V linVel10 = V4LoadA(&b10.linearVelocity.x);
|
|
Vec4V linVel11 = V4LoadA(&b11.linearVelocity.x);
|
|
Vec4V angState10 = V4LoadA(&b10.angularVelocity.x);
|
|
Vec4V angState11 = V4LoadA(&b11.angularVelocity.x);
|
|
|
|
Vec4V linVel20 = V4LoadA(&b20.linearVelocity.x);
|
|
Vec4V linVel21 = V4LoadA(&b21.linearVelocity.x);
|
|
Vec4V angState20 = V4LoadA(&b20.angularVelocity.x);
|
|
Vec4V angState21 = V4LoadA(&b21.angularVelocity.x);
|
|
|
|
Vec4V linVel30 = V4LoadA(&b30.linearVelocity.x);
|
|
Vec4V linVel31 = V4LoadA(&b31.linearVelocity.x);
|
|
Vec4V angState30 = V4LoadA(&b30.angularVelocity.x);
|
|
Vec4V angState31 = V4LoadA(&b31.angularVelocity.x);
|
|
|
|
Vec4V linVel0T0, linVel0T1, linVel0T2, linVel0T3;
|
|
Vec4V linVel1T0, linVel1T1, linVel1T2, linVel1T3;
|
|
Vec4V angState0T0, angState0T1, angState0T2, angState0T3;
|
|
Vec4V angState1T0, angState1T1, angState1T2, angState1T3;
|
|
|
|
PX_TRANSPOSE_44(linVel00, linVel10, linVel20, linVel30, linVel0T0, linVel0T1, linVel0T2, linVel0T3);
|
|
PX_TRANSPOSE_44(linVel01, linVel11, linVel21, linVel31, linVel1T0, linVel1T1, linVel1T2, linVel1T3);
|
|
PX_TRANSPOSE_44(angState00, angState10, angState20, angState30, angState0T0, angState0T1, angState0T2, angState0T3);
|
|
PX_TRANSPOSE_44(angState01, angState11, angState21, angState31, angState1T0, angState1T1, angState1T2, angState1T3);
|
|
|
|
Vec4V linDelta00_ = V4LoadA(&b00.deltaLinDt.x);
|
|
Vec4V linDelta01_ = V4LoadA(&b01.deltaLinDt.x);
|
|
Vec4V angDelta00_ = V4LoadA(&b00.deltaAngDt.x);
|
|
Vec4V angDelta01_ = V4LoadA(&b01.deltaAngDt.x);
|
|
|
|
Vec4V linDelta10_ = V4LoadA(&b10.deltaLinDt.x);
|
|
Vec4V linDelta11_ = V4LoadA(&b11.deltaLinDt.x);
|
|
Vec4V angDelta10_ = V4LoadA(&b10.deltaAngDt.x);
|
|
Vec4V angDelta11_ = V4LoadA(&b11.deltaAngDt.x);
|
|
|
|
Vec4V linDelta20_ = V4LoadA(&b20.deltaLinDt.x);
|
|
Vec4V linDelta21_ = V4LoadA(&b21.deltaLinDt.x);
|
|
Vec4V angDelta20_ = V4LoadA(&b20.deltaAngDt.x);
|
|
Vec4V angDelta21_ = V4LoadA(&b21.deltaAngDt.x);
|
|
|
|
Vec4V linDelta30_ = V4LoadA(&b30.deltaLinDt.x);
|
|
Vec4V linDelta31_ = V4LoadA(&b31.deltaLinDt.x);
|
|
Vec4V angDelta30_ = V4LoadA(&b30.deltaAngDt.x);
|
|
Vec4V angDelta31_ = V4LoadA(&b31.deltaAngDt.x);
|
|
|
|
Vec4V linDelta0T0, linDelta0T1, linDelta0T2;
|
|
Vec4V linDelta1T0, linDelta1T1, linDelta1T2;
|
|
Vec4V angDelta0T0, angDelta0T1, angDelta0T2;
|
|
Vec4V angDelta1T0, angDelta1T1, angDelta1T2;
|
|
|
|
PX_TRANSPOSE_44_34(linDelta00_, linDelta10_, linDelta20_, linDelta30_, linDelta0T0, linDelta0T1, linDelta0T2);
|
|
PX_TRANSPOSE_44_34(linDelta01_, linDelta11_, linDelta21_, linDelta31_, linDelta1T0, linDelta1T1, linDelta1T2);
|
|
PX_TRANSPOSE_44_34(angDelta00_, angDelta10_, angDelta20_, angDelta30_, angDelta0T0, angDelta0T1, angDelta0T2);
|
|
PX_TRANSPOSE_44_34(angDelta01_, angDelta11_, angDelta21_, angDelta31_, angDelta1T0, angDelta1T1, angDelta1T2);
|
|
|
|
const PxU8* PX_RESTRICT last = desc[0].constraint + getConstraintLength(desc[0]);
|
|
|
|
//hopefully pointer aliasing doesn't bite.
|
|
PxU8* PX_RESTRICT currPtr = desc[0].constraint;
|
|
|
|
Vec4V vMax = V4Splat(FMax());
|
|
|
|
SolverContactHeaderStepBlock* PX_RESTRICT hdr = reinterpret_cast<SolverContactHeaderStepBlock*>(currPtr);
|
|
|
|
const Vec4V invMassA = hdr->invMass0D0;
|
|
const Vec4V invMassB = hdr->invMass1D1;
|
|
|
|
const Vec4V sumInvMass = V4Add(invMassA, invMassB);
|
|
|
|
Vec4V linDeltaX = V4Sub(linDelta0T0, linDelta1T0);
|
|
Vec4V linDeltaY = V4Sub(linDelta0T1, linDelta1T1);
|
|
Vec4V linDeltaZ = V4Sub(linDelta0T2, linDelta1T2);
|
|
|
|
Dy::ErrorAccumulator error;
|
|
const bool residualReportingActive = cache.contactErrorAccumulator;
|
|
|
|
while (currPtr < last)
|
|
{
|
|
hdr = reinterpret_cast<SolverContactHeaderStepBlock*>(currPtr);
|
|
|
|
PX_ASSERT(hdr->type == DY_SC_TYPE_BLOCK_RB_CONTACT);
|
|
|
|
currPtr = reinterpret_cast<PxU8*>(const_cast<SolverContactHeaderStepBlock*>(hdr) + 1);
|
|
|
|
const PxU32 numNormalConstr = hdr->numNormalConstr;
|
|
const PxU32 numFrictionConstr = hdr->numFrictionConstr;
|
|
|
|
const bool hasMaxImpulse = (hdr->flag & SolverContactHeaderStepBlock::eHAS_MAX_IMPULSE) != 0;
|
|
|
|
Vec4V* appliedForces = reinterpret_cast<Vec4V*>(currPtr);
|
|
currPtr += sizeof(Vec4V)*numNormalConstr;
|
|
|
|
SolverContactPointStepBlock* PX_RESTRICT contacts = reinterpret_cast<SolverContactPointStepBlock*>(currPtr);
|
|
|
|
Vec4V* maxImpulses;
|
|
currPtr = reinterpret_cast<PxU8*>(contacts + numNormalConstr);
|
|
PxU32 maxImpulseMask = 0;
|
|
if (hasMaxImpulse)
|
|
{
|
|
maxImpulseMask = 0xFFFFFFFF;
|
|
maxImpulses = reinterpret_cast<Vec4V*>(currPtr);
|
|
currPtr += sizeof(Vec4V) * numNormalConstr;
|
|
}
|
|
else
|
|
{
|
|
maxImpulses = &vMax;
|
|
}
|
|
|
|
/*SolverFrictionSharedData4* PX_RESTRICT fd = reinterpret_cast<SolverFrictionSharedData4*>(currPtr);
|
|
if (numFrictionConstr)
|
|
currPtr += sizeof(SolverFrictionSharedData4);*/
|
|
|
|
Vec4V* frictionAppliedForce = reinterpret_cast<Vec4V*>(currPtr);
|
|
currPtr += sizeof(Vec4V)*numFrictionConstr;
|
|
|
|
const SolverContactFrictionStepBlock* PX_RESTRICT frictions = reinterpret_cast<SolverContactFrictionStepBlock*>(currPtr);
|
|
currPtr += numFrictionConstr * sizeof(SolverContactFrictionStepBlock);
|
|
|
|
Vec4V accumulatedNormalImpulse = vZero;
|
|
|
|
const Vec4V angD0 = hdr->angDom0;
|
|
const Vec4V angD1 = hdr->angDom1;
|
|
|
|
const Vec4V _normalT0 = hdr->normalX;
|
|
const Vec4V _normalT1 = hdr->normalY;
|
|
const Vec4V _normalT2 = hdr->normalZ;
|
|
|
|
Vec4V contactNormalVel1 = V4Mul(linVel0T0, _normalT0);
|
|
Vec4V contactNormalVel3 = V4Mul(linVel1T0, _normalT0);
|
|
contactNormalVel1 = V4MulAdd(linVel0T1, _normalT1, contactNormalVel1);
|
|
contactNormalVel3 = V4MulAdd(linVel1T1, _normalT1, contactNormalVel3);
|
|
contactNormalVel1 = V4MulAdd(linVel0T2, _normalT2, contactNormalVel1);
|
|
contactNormalVel3 = V4MulAdd(linVel1T2, _normalT2, contactNormalVel3);
|
|
|
|
const Vec4V maxPenBias = hdr->maxPenBias;
|
|
|
|
Vec4V relVel1 = V4Sub(contactNormalVel1, contactNormalVel3);
|
|
|
|
Vec4V deltaNormalV = V4Mul(linDeltaX, _normalT0);
|
|
deltaNormalV = V4MulAdd(linDeltaY, _normalT1, deltaNormalV);
|
|
deltaNormalV = V4MulAdd(linDeltaZ, _normalT2, deltaNormalV);
|
|
|
|
Vec4V accumDeltaF = vZero;
|
|
|
|
for (PxU32 i = 0; i<numNormalConstr; i++)
|
|
{
|
|
const SolverContactPointStepBlock& c = contacts[i];
|
|
|
|
/*PxU32 offset = 0;
|
|
PxPrefetchLine(prefetchAddress, offset += 64);
|
|
PxPrefetchLine(prefetchAddress, offset += 64);
|
|
PxPrefetchLine(prefetchAddress, offset += 64);
|
|
prefetchAddress += offset;*/
|
|
|
|
const Vec4V appliedForce = appliedForces[i];
|
|
const Vec4V maxImpulse = maxImpulses[i & maxImpulseMask];
|
|
|
|
Vec4V contactNormalVel2 = V4Mul(c.raXnI[0], angState0T0);
|
|
Vec4V contactNormalVel4 = V4Mul(c.rbXnI[0], angState1T0);
|
|
|
|
contactNormalVel2 = V4MulAdd(c.raXnI[1], angState0T1, contactNormalVel2);
|
|
contactNormalVel4 = V4MulAdd(c.rbXnI[1], angState1T1, contactNormalVel4);
|
|
|
|
contactNormalVel2 = V4MulAdd(c.raXnI[2], angState0T2, contactNormalVel2);
|
|
contactNormalVel4 = V4MulAdd(c.rbXnI[2], angState1T2, contactNormalVel4);
|
|
|
|
const Vec4V normalVel = V4Add(relVel1, V4Sub(contactNormalVel2, contactNormalVel4));
|
|
|
|
Vec4V angDelta0 = V4Mul(angDelta0T0, c.raXnI[0]);
|
|
Vec4V angDelta1 = V4Mul(angDelta1T0, c.rbXnI[0]);
|
|
angDelta0 = V4MulAdd(angDelta0T1, c.raXnI[1], angDelta0);
|
|
angDelta1 = V4MulAdd(angDelta1T1, c.rbXnI[1], angDelta1);
|
|
angDelta0 = V4MulAdd(angDelta0T2, c.raXnI[2], angDelta0);
|
|
angDelta1 = V4MulAdd(angDelta1T2, c.rbXnI[2], angDelta1);
|
|
|
|
const Vec4V deltaAng = V4Sub(angDelta0, angDelta1);
|
|
|
|
const Vec4V targetVel = c.targetVelocity;
|
|
|
|
const Vec4V deltaBias = V4Sub(V4Add(deltaNormalV, deltaAng), V4Mul(targetVel, elapsedTime));
|
|
//const Vec4V deltaBias = V4Add(deltaNormalV, deltaAng);
|
|
|
|
const Vec4V biasCoefficient = c.biasCoefficient;
|
|
|
|
const Vec4V sep = V4Max(minPen, V4Add(c.separation, deltaBias));
|
|
|
|
const Vec4V bias = V4Min(V4Neg(maxPenBias), V4Mul(biasCoefficient, sep));
|
|
|
|
const Vec4V velMultiplier = c.velMultiplier;
|
|
|
|
const Vec4V tVelBias = V4Mul(bias, c.recipResponse);
|
|
|
|
const Vec4V _deltaF = V4Max(V4Sub(tVelBias, V4Mul(V4Sub(normalVel, targetVel), velMultiplier)), V4Neg(appliedForce));
|
|
//Vec4V deltaF = V4NegMulSub(normalVel, c.velMultiplier, c.biasedErr);
|
|
|
|
const Vec4V newAppliedForce = V4Min(V4Add(appliedForce, _deltaF), maxImpulse);
|
|
const Vec4V deltaF = V4Sub(newAppliedForce, appliedForce);
|
|
|
|
if (residualReportingActive)
|
|
error.accumulateErrorLocalV4(deltaF, velMultiplier);
|
|
|
|
accumDeltaF = V4Add(accumDeltaF, deltaF);
|
|
|
|
const Vec4V angDetaF0 = V4Mul(deltaF, angD0);
|
|
const Vec4V angDetaF1 = V4Mul(deltaF, angD1);
|
|
|
|
relVel1 = V4MulAdd(sumInvMass, deltaF, relVel1);
|
|
|
|
angState0T0 = V4MulAdd(c.raXnI[0], angDetaF0, angState0T0);
|
|
angState1T0 = V4NegMulSub(c.rbXnI[0], angDetaF1, angState1T0);
|
|
|
|
angState0T1 = V4MulAdd(c.raXnI[1], angDetaF0, angState0T1);
|
|
angState1T1 = V4NegMulSub(c.rbXnI[1], angDetaF1, angState1T1);
|
|
|
|
angState0T2 = V4MulAdd(c.raXnI[2], angDetaF0, angState0T2);
|
|
angState1T2 = V4NegMulSub(c.rbXnI[2], angDetaF1, angState1T2);
|
|
|
|
appliedForces[i] = newAppliedForce;
|
|
|
|
accumulatedNormalImpulse = V4Add(accumulatedNormalImpulse, newAppliedForce);
|
|
}
|
|
|
|
const Vec4V accumDeltaF_IM0 = V4Mul(accumDeltaF, invMassA);
|
|
const Vec4V accumDeltaF_IM1 = V4Mul(accumDeltaF, invMassB);
|
|
|
|
linVel0T0 = V4MulAdd(_normalT0, accumDeltaF_IM0, linVel0T0);
|
|
linVel1T0 = V4NegMulSub(_normalT0, accumDeltaF_IM1, linVel1T0);
|
|
linVel0T1 = V4MulAdd(_normalT1, accumDeltaF_IM0, linVel0T1);
|
|
linVel1T1 = V4NegMulSub(_normalT1, accumDeltaF_IM1, linVel1T1);
|
|
linVel0T2 = V4MulAdd(_normalT2, accumDeltaF_IM0, linVel0T2);
|
|
linVel1T2 = V4NegMulSub(_normalT2, accumDeltaF_IM1, linVel1T2);
|
|
|
|
if (doFriction && numFrictionConstr)
|
|
{
|
|
const Vec4V staticFric = hdr->staticFriction;
|
|
const Vec4V dynamicFric = hdr->dynamicFriction;
|
|
|
|
const Vec4V maxFrictionImpulse = V4Add(V4Mul(staticFric, accumulatedNormalImpulse), V4Load(1e-5f));
|
|
const Vec4V maxDynFrictionImpulse = V4Mul(dynamicFric, accumulatedNormalImpulse);
|
|
BoolV broken = BFFFF();
|
|
|
|
for (PxU32 i = 0; i<numFrictionConstr; i+=2)
|
|
{
|
|
const SolverContactFrictionStepBlock& f0 = frictions[i];
|
|
const SolverContactFrictionStepBlock& f1 = frictions[i+1];
|
|
|
|
/*PxU32 offset = 0;
|
|
PxPrefetchLine(prefetchAddress, offset += 64);
|
|
PxPrefetchLine(prefetchAddress, offset += 64);
|
|
PxPrefetchLine(prefetchAddress, offset += 64);
|
|
PxPrefetchLine(prefetchAddress, offset += 64);
|
|
prefetchAddress += offset;*/
|
|
|
|
const Vec4V appliedForce0 = frictionAppliedForce[i];
|
|
const Vec4V appliedForce1 = frictionAppliedForce[i+1];
|
|
|
|
const Vec4V normalT00 = f0.normal[0];
|
|
const Vec4V normalT10 = f0.normal[1];
|
|
const Vec4V normalT20 = f0.normal[2];
|
|
|
|
const Vec4V normalT01 = f1.normal[0];
|
|
const Vec4V normalT11 = f1.normal[1];
|
|
const Vec4V normalT21 = f1.normal[2];
|
|
|
|
Vec4V normalVel10 = V4Mul(linVel0T0, normalT00);
|
|
Vec4V normalVel20 = V4Mul(f0.raXnI[0], angState0T0);
|
|
Vec4V normalVel30 = V4Mul(linVel1T0, normalT00);
|
|
Vec4V normalVel40 = V4Mul(f0.rbXnI[0], angState1T0);
|
|
Vec4V normalVel11 = V4Mul(linVel0T0, normalT01);
|
|
Vec4V normalVel21 = V4Mul(f1.raXnI[0], angState0T0);
|
|
Vec4V normalVel31 = V4Mul(linVel1T0, normalT01);
|
|
Vec4V normalVel41 = V4Mul(f1.rbXnI[0], angState1T0);
|
|
|
|
normalVel10 = V4MulAdd(linVel0T1, normalT10, normalVel10);
|
|
normalVel20 = V4MulAdd(f0.raXnI[1], angState0T1, normalVel20);
|
|
normalVel30 = V4MulAdd(linVel1T1, normalT10, normalVel30);
|
|
normalVel40 = V4MulAdd(f0.rbXnI[1], angState1T1, normalVel40);
|
|
normalVel11 = V4MulAdd(linVel0T1, normalT11, normalVel11);
|
|
normalVel21 = V4MulAdd(f1.raXnI[1], angState0T1, normalVel21);
|
|
normalVel31 = V4MulAdd(linVel1T1, normalT11, normalVel31);
|
|
normalVel41 = V4MulAdd(f1.rbXnI[1], angState1T1, normalVel41);
|
|
|
|
normalVel10 = V4MulAdd(linVel0T2, normalT20, normalVel10);
|
|
normalVel20 = V4MulAdd(f0.raXnI[2], angState0T2, normalVel20);
|
|
normalVel30 = V4MulAdd(linVel1T2, normalT20, normalVel30);
|
|
normalVel40 = V4MulAdd(f0.rbXnI[2], angState1T2, normalVel40);
|
|
normalVel11 = V4MulAdd(linVel0T2, normalT21, normalVel11);
|
|
normalVel21 = V4MulAdd(f1.raXnI[2], angState0T2, normalVel21);
|
|
normalVel31 = V4MulAdd(linVel1T2, normalT21, normalVel31);
|
|
normalVel41 = V4MulAdd(f1.rbXnI[2], angState1T2, normalVel41);
|
|
|
|
const Vec4V normalVel0_tmp1 = V4Add(normalVel10, normalVel20);
|
|
const Vec4V normalVel0_tmp2 = V4Add(normalVel30, normalVel40);
|
|
const Vec4V normalVel0 = V4Sub(normalVel0_tmp1, normalVel0_tmp2);
|
|
const Vec4V normalVel1_tmp1 = V4Add(normalVel11, normalVel21);
|
|
const Vec4V normalVel1_tmp2 = V4Add(normalVel31, normalVel41);
|
|
const Vec4V normalVel1 = V4Sub(normalVel1_tmp1, normalVel1_tmp2);
|
|
|
|
Vec4V deltaV0 = V4Mul(linDeltaX, normalT00);
|
|
deltaV0 = V4MulAdd(linDeltaY, normalT10, deltaV0);
|
|
deltaV0 = V4MulAdd(linDeltaZ, normalT20, deltaV0);
|
|
Vec4V deltaV1 = V4Mul(linDeltaX, normalT01);
|
|
deltaV1 = V4MulAdd(linDeltaY, normalT11, deltaV1);
|
|
deltaV1 = V4MulAdd(linDeltaZ, normalT21, deltaV1);
|
|
|
|
Vec4V angDelta00 = V4Mul(angDelta0T0, f0.raXnI[0]);
|
|
Vec4V angDelta10 = V4Mul(angDelta1T0, f0.rbXnI[0]);
|
|
angDelta00 = V4MulAdd(angDelta0T1, f0.raXnI[1], angDelta00);
|
|
angDelta10 = V4MulAdd(angDelta1T1, f0.rbXnI[1], angDelta10);
|
|
angDelta00 = V4MulAdd(angDelta0T2, f0.raXnI[2], angDelta00);
|
|
angDelta10 = V4MulAdd(angDelta1T2, f0.rbXnI[2], angDelta10);
|
|
|
|
Vec4V angDelta01 = V4Mul(angDelta0T0, f1.raXnI[0]);
|
|
Vec4V angDelta11 = V4Mul(angDelta1T0, f1.rbXnI[0]);
|
|
angDelta01 = V4MulAdd(angDelta0T1, f1.raXnI[1], angDelta01);
|
|
angDelta11 = V4MulAdd(angDelta1T1, f1.rbXnI[1], angDelta11);
|
|
angDelta01 = V4MulAdd(angDelta0T2, f1.raXnI[2], angDelta01);
|
|
angDelta11 = V4MulAdd(angDelta1T2, f1.rbXnI[2], angDelta11);
|
|
|
|
const Vec4V deltaAng0 = V4Sub(angDelta00, angDelta10);
|
|
const Vec4V deltaAng1 = V4Sub(angDelta01, angDelta11);
|
|
|
|
const Vec4V deltaBias0 = V4Sub(V4Add(deltaV0, deltaAng0), V4Mul(f0.targetVel, elapsedTime));
|
|
const Vec4V deltaBias1 = V4Sub(V4Add(deltaV1, deltaAng1), V4Mul(f1.targetVel, elapsedTime));
|
|
|
|
const Vec4V error0 = V4Add(f0.error, deltaBias0);
|
|
const Vec4V error1 = V4Add(f1.error, deltaBias1);
|
|
|
|
const Vec4V bias0 = V4Mul(error0, f0.biasCoefficient);
|
|
const Vec4V bias1 = V4Mul(error1, f1.biasCoefficient);
|
|
|
|
const Vec4V tmp10 = V4NegMulSub(V4Sub(bias0, f0.targetVel), f0.velMultiplier, appliedForce0);
|
|
const Vec4V tmp11 = V4NegMulSub(V4Sub(bias1, f1.targetVel), f1.velMultiplier, appliedForce1);
|
|
|
|
const Vec4V totalImpulse0 = V4NegMulSub(normalVel0, f0.velMultiplier, tmp10);
|
|
const Vec4V totalImpulse1 = V4NegMulSub(normalVel1, f1.velMultiplier, tmp11);
|
|
|
|
const Vec4V totalImpulse = V4Sqrt(V4MulAdd(totalImpulse0, totalImpulse0, V4Mul(totalImpulse1, totalImpulse1)));
|
|
|
|
const BoolV clamped = V4IsGrtr(totalImpulse, maxFrictionImpulse);
|
|
|
|
broken = BOr(broken, clamped);
|
|
|
|
const Vec4V totalClamped = V4Sel(broken, V4Min(totalImpulse, maxDynFrictionImpulse), totalImpulse);
|
|
const Vec4V ratio = V4Sel(V4IsGrtr(totalImpulse, vZero), V4Div(totalClamped, totalImpulse), vZero);
|
|
|
|
const Vec4V newAppliedForce0 = V4Mul(totalImpulse0, ratio);
|
|
const Vec4V newAppliedForce1 = V4Mul(totalImpulse1, ratio);
|
|
|
|
const Vec4V deltaF0 = V4Sub(newAppliedForce0, appliedForce0);
|
|
const Vec4V deltaF1 = V4Sub(newAppliedForce1, appliedForce1);
|
|
|
|
if (residualReportingActive)
|
|
error.accumulateErrorLocalV4(deltaF0, f0.velMultiplier, deltaF1, f1.velMultiplier);
|
|
|
|
frictionAppliedForce[i] = newAppliedForce0;
|
|
frictionAppliedForce[i+1] = newAppliedForce1;
|
|
|
|
const Vec4V deltaFIM00 = V4Mul(deltaF0, invMassA);
|
|
const Vec4V deltaFIM10 = V4Mul(deltaF0, invMassB);
|
|
const Vec4V angDetaF00 = V4Mul(deltaF0, angD0);
|
|
const Vec4V angDetaF10 = V4Mul(deltaF0, angD1);
|
|
|
|
const Vec4V deltaFIM01 = V4Mul(deltaF1, invMassA);
|
|
const Vec4V deltaFIM11 = V4Mul(deltaF1, invMassB);
|
|
const Vec4V angDetaF01 = V4Mul(deltaF1, angD0);
|
|
const Vec4V angDetaF11 = V4Mul(deltaF1, angD1);
|
|
|
|
linVel0T0 = V4MulAdd(normalT00, deltaFIM00, V4MulAdd(normalT01, deltaFIM01, linVel0T0));
|
|
linVel1T0 = V4NegMulSub(normalT00, deltaFIM10, V4NegMulSub(normalT01, deltaFIM11, linVel1T0));
|
|
angState0T0 = V4MulAdd(f0.raXnI[0], angDetaF00, V4MulAdd(f1.raXnI[0], angDetaF01, angState0T0));
|
|
angState1T0 = V4NegMulSub(f0.rbXnI[0], angDetaF10, V4NegMulSub(f1.rbXnI[0], angDetaF11, angState1T0));
|
|
|
|
linVel0T1 = V4MulAdd(normalT10, deltaFIM00, V4MulAdd(normalT11, deltaFIM01, linVel0T1));
|
|
linVel1T1 = V4NegMulSub(normalT10, deltaFIM10, V4NegMulSub(normalT11, deltaFIM11, linVel1T1));
|
|
angState0T1 = V4MulAdd(f0.raXnI[1], angDetaF00, V4MulAdd(f1.raXnI[1], angDetaF01, angState0T1));
|
|
angState1T1 = V4NegMulSub(f0.rbXnI[1], angDetaF10, V4NegMulSub(f1.rbXnI[1], angDetaF11, angState1T1));
|
|
|
|
linVel0T2 = V4MulAdd(normalT20, deltaFIM00, V4MulAdd(normalT21, deltaFIM01, linVel0T2));
|
|
linVel1T2 = V4NegMulSub(normalT20, deltaFIM10, V4NegMulSub(normalT21, deltaFIM11, linVel1T2));
|
|
angState0T2 = V4MulAdd(f0.raXnI[2], angDetaF00, V4MulAdd(f1.raXnI[2], angDetaF01, angState0T2));
|
|
angState1T2 = V4NegMulSub(f0.rbXnI[2], angDetaF10, V4NegMulSub(f1.rbXnI[2], angDetaF11, angState1T2));
|
|
}
|
|
hdr->broken = broken;
|
|
}
|
|
}
|
|
|
|
PX_TRANSPOSE_44(linVel0T0, linVel0T1, linVel0T2, linVel0T3, linVel00, linVel10, linVel20, linVel30);
|
|
PX_TRANSPOSE_44(linVel1T0, linVel1T1, linVel1T2, linVel1T3, linVel01, linVel11, linVel21, linVel31);
|
|
PX_TRANSPOSE_44(angState0T0, angState0T1, angState0T2, angState0T3, angState00, angState10, angState20, angState30);
|
|
PX_TRANSPOSE_44(angState1T0, angState1T1, angState1T2, angState1T3, angState01, angState11, angState21, angState31);
|
|
|
|
PX_ASSERT(b00.linearVelocity.isFinite());
|
|
PX_ASSERT(b00.angularVelocity.isFinite());
|
|
PX_ASSERT(b10.linearVelocity.isFinite());
|
|
PX_ASSERT(b10.angularVelocity.isFinite());
|
|
PX_ASSERT(b20.linearVelocity.isFinite());
|
|
PX_ASSERT(b20.angularVelocity.isFinite());
|
|
PX_ASSERT(b30.linearVelocity.isFinite());
|
|
PX_ASSERT(b30.angularVelocity.isFinite());
|
|
|
|
PX_ASSERT(b01.linearVelocity.isFinite());
|
|
PX_ASSERT(b01.angularVelocity.isFinite());
|
|
PX_ASSERT(b11.linearVelocity.isFinite());
|
|
PX_ASSERT(b11.angularVelocity.isFinite());
|
|
PX_ASSERT(b21.linearVelocity.isFinite());
|
|
PX_ASSERT(b21.angularVelocity.isFinite());
|
|
PX_ASSERT(b31.linearVelocity.isFinite());
|
|
PX_ASSERT(b31.angularVelocity.isFinite());
|
|
|
|
// Write back
|
|
V4StoreA(linVel00, &b00.linearVelocity.x);
|
|
V4StoreA(angState00, &b00.angularVelocity.x);
|
|
V4StoreA(linVel10, &b10.linearVelocity.x);
|
|
V4StoreA(angState10, &b10.angularVelocity.x);
|
|
V4StoreA(linVel20, &b20.linearVelocity.x);
|
|
V4StoreA(angState20, &b20.angularVelocity.x);
|
|
V4StoreA(linVel30, &b30.linearVelocity.x);
|
|
V4StoreA(angState30, &b30.angularVelocity.x);
|
|
|
|
if (desc[0].bodyBDataIndex != 0)
|
|
{
|
|
V4StoreA(linVel01, &b01.linearVelocity.x);
|
|
V4StoreA(angState01, &b01.angularVelocity.x);
|
|
}
|
|
if (desc[1].bodyBDataIndex != 0)
|
|
{
|
|
V4StoreA(linVel11, &b11.linearVelocity.x);
|
|
V4StoreA(angState11, &b11.angularVelocity.x);
|
|
}
|
|
if (desc[2].bodyBDataIndex != 0)
|
|
{
|
|
V4StoreA(linVel21, &b21.linearVelocity.x);
|
|
V4StoreA(angState21, &b21.angularVelocity.x);
|
|
}
|
|
if (desc[3].bodyBDataIndex != 0)
|
|
{
|
|
V4StoreA(linVel31, &b31.linearVelocity.x);
|
|
V4StoreA(angState31, &b31.angularVelocity.x);
|
|
}
|
|
|
|
PX_ASSERT(b00.linearVelocity.isFinite());
|
|
PX_ASSERT(b00.angularVelocity.isFinite());
|
|
PX_ASSERT(b10.linearVelocity.isFinite());
|
|
PX_ASSERT(b10.angularVelocity.isFinite());
|
|
PX_ASSERT(b20.linearVelocity.isFinite());
|
|
PX_ASSERT(b20.angularVelocity.isFinite());
|
|
PX_ASSERT(b30.linearVelocity.isFinite());
|
|
PX_ASSERT(b30.angularVelocity.isFinite());
|
|
|
|
PX_ASSERT(b01.linearVelocity.isFinite());
|
|
PX_ASSERT(b01.angularVelocity.isFinite());
|
|
PX_ASSERT(b11.linearVelocity.isFinite());
|
|
PX_ASSERT(b11.angularVelocity.isFinite());
|
|
PX_ASSERT(b21.linearVelocity.isFinite());
|
|
PX_ASSERT(b21.angularVelocity.isFinite());
|
|
PX_ASSERT(b31.linearVelocity.isFinite());
|
|
PX_ASSERT(b31.angularVelocity.isFinite());
|
|
|
|
if (residualReportingActive)
|
|
error.accumulateErrorGlobal(*cache.contactErrorAccumulator);
|
|
}
|
|
|
|
// VR: used in both PGS and TGS
|
|
void computeFrictionImpulseBlock(
|
|
const Vec4V& axis0X, const Vec4V& axis0Y, const Vec4V& axis0Z,
|
|
const Vec4V& axis1X, const Vec4V& axis1Y, const Vec4V& axis1Z,
|
|
const Vec4V appliedForce0, const Vec4V appliedForce1,
|
|
Vec4V& impulse0, Vec4V& impulse1, Vec4V& impulse2, Vec4V& impulse3
|
|
);
|
|
|
|
static void writeBackContact4_Block(const PxSolverConstraintDesc* PX_RESTRICT desc, SolverContext* /*cache*/)
|
|
{
|
|
const PxU8* PX_RESTRICT last = desc[0].constraint + getConstraintLength(desc[0]);
|
|
|
|
//hopefully pointer aliasing doesn't bite.
|
|
PxU8* PX_RESTRICT currPtr = desc[0].constraint;
|
|
PxReal* PX_RESTRICT vForceWriteback0 = reinterpret_cast<PxReal*>(desc[0].writeBack);
|
|
PxReal* PX_RESTRICT vForceWriteback1 = reinterpret_cast<PxReal*>(desc[1].writeBack);
|
|
PxReal* PX_RESTRICT vForceWriteback2 = reinterpret_cast<PxReal*>(desc[2].writeBack);
|
|
PxReal* PX_RESTRICT vForceWriteback3 = reinterpret_cast<PxReal*>(desc[3].writeBack);
|
|
PxVec3* PX_RESTRICT vFrictionWriteback0 = reinterpret_cast<PxVec3*>(desc[0].writeBackFriction);
|
|
PxVec3* PX_RESTRICT vFrictionWriteback1 = reinterpret_cast<PxVec3*>(desc[1].writeBackFriction);
|
|
PxVec3* PX_RESTRICT vFrictionWriteback2 = reinterpret_cast<PxVec3*>(desc[2].writeBackFriction);
|
|
PxVec3* PX_RESTRICT vFrictionWriteback3 = reinterpret_cast<PxVec3*>(desc[3].writeBackFriction);
|
|
|
|
//const PxU8 type = *desc[0].constraint;
|
|
const PxU32 contactSize = sizeof(SolverContactPointStepBlock);
|
|
const PxU32 frictionSize = sizeof(SolverContactFrictionStepBlock);
|
|
|
|
Vec4V normalForce = V4Zero();
|
|
|
|
//We'll need this.
|
|
//const Vec4V vZero = V4Zero();
|
|
|
|
bool writeBackThresholds[4] = { false, false, false, false };
|
|
|
|
while ((currPtr < last))
|
|
{
|
|
SolverContactHeaderStepBlock* PX_RESTRICT hdr = reinterpret_cast<SolverContactHeaderStepBlock*>(currPtr);
|
|
|
|
currPtr = reinterpret_cast<PxU8*>(hdr + 1);
|
|
|
|
const PxU32 numNormalConstr = hdr->numNormalConstr;
|
|
const PxU32 numFrictionConstr = hdr->numFrictionConstr;
|
|
|
|
Vec4V* PX_RESTRICT appliedForces = reinterpret_cast<Vec4V*>(currPtr);
|
|
currPtr += sizeof(Vec4V)*numNormalConstr;
|
|
|
|
//SolverContactBatchPointBase4* PX_RESTRICT contacts = (SolverContactBatchPointBase4*)currPtr;
|
|
currPtr += (numNormalConstr * contactSize);
|
|
|
|
const bool hasMaxImpulse = (hdr->flag & SolverContactHeader4::eHAS_MAX_IMPULSE) != 0;
|
|
|
|
if (hasMaxImpulse)
|
|
currPtr += sizeof(Vec4V) * numNormalConstr;
|
|
|
|
Vec4V* frictionAppliedForce = reinterpret_cast<Vec4V*>(currPtr);
|
|
currPtr += sizeof(Vec4V)*numFrictionConstr;
|
|
|
|
SolverContactFrictionStepBlock* PX_RESTRICT frictions = (SolverContactFrictionStepBlock*)currPtr;
|
|
currPtr += (numFrictionConstr * frictionSize);
|
|
|
|
writeBackThresholds[0] = hdr->flags[0] & SolverContactHeader::eHAS_FORCE_THRESHOLDS;
|
|
writeBackThresholds[1] = hdr->flags[1] & SolverContactHeader::eHAS_FORCE_THRESHOLDS;
|
|
writeBackThresholds[2] = hdr->flags[2] & SolverContactHeader::eHAS_FORCE_THRESHOLDS;
|
|
writeBackThresholds[3] = hdr->flags[3] & SolverContactHeader::eHAS_FORCE_THRESHOLDS;
|
|
|
|
for (PxU32 i = 0; i<numNormalConstr; i++)
|
|
{
|
|
//contacts = (SolverContactBatchPointBase4*)(((PxU8*)contacts) + contactSize);
|
|
const FloatV appliedForce0 = V4GetX(appliedForces[i]);
|
|
const FloatV appliedForce1 = V4GetY(appliedForces[i]);
|
|
const FloatV appliedForce2 = V4GetZ(appliedForces[i]);
|
|
const FloatV appliedForce3 = V4GetW(appliedForces[i]);
|
|
|
|
normalForce = V4Add(normalForce, appliedForces[i]);
|
|
|
|
if (vForceWriteback0 && i < hdr->numNormalConstrs[0])
|
|
FStore(appliedForce0, vForceWriteback0++);
|
|
if (vForceWriteback1 && i < hdr->numNormalConstrs[1])
|
|
FStore(appliedForce1, vForceWriteback1++);
|
|
if (vForceWriteback2 && i < hdr->numNormalConstrs[2])
|
|
FStore(appliedForce2, vForceWriteback2++);
|
|
if (vForceWriteback3 && i < hdr->numNormalConstrs[3])
|
|
FStore(appliedForce3, vForceWriteback3++);
|
|
}
|
|
|
|
// Writeback friction impulses
|
|
if (numFrictionConstr)
|
|
{
|
|
//We will have either 4 or 2 frictions (with friction pairs).
|
|
//With torsional friction, we may have 3 (a single friction anchor + twist).
|
|
const PxU32 numFrictionPairs = (numFrictionConstr & 6);
|
|
|
|
for (PxU32 i = 0; i < numFrictionPairs; i += 2)
|
|
{
|
|
SolverContactFrictionStepBlock& f0 = frictions[i + 0];
|
|
SolverContactFrictionStepBlock& f1 = frictions[i + 1];
|
|
|
|
const Vec4V axis0X = f0.normal[0];
|
|
const Vec4V axis0Y = f0.normal[1];
|
|
const Vec4V axis0Z = f0.normal[2];
|
|
|
|
const Vec4V axis1X = f1.normal[0];
|
|
const Vec4V axis1Y = f1.normal[1];
|
|
const Vec4V axis1Z = f1.normal[2];
|
|
|
|
const Vec4V appliedForce0 = frictionAppliedForce[i + 0];
|
|
const Vec4V appliedForce1 = frictionAppliedForce[i + 1];
|
|
|
|
Vec4V impulse0, impulse1, impulse2, impulse3;
|
|
computeFrictionImpulseBlock(axis0X, axis0Y, axis0Z,
|
|
axis1X, axis1Y, axis1Z,
|
|
appliedForce0, appliedForce1,
|
|
impulse0, impulse1, impulse2, impulse3);
|
|
|
|
if (vFrictionWriteback0)
|
|
V3StoreU(Vec3V_From_Vec4V_WUndefined(impulse0), vFrictionWriteback0[i / 2]);
|
|
if (vFrictionWriteback1)
|
|
V3StoreU(Vec3V_From_Vec4V_WUndefined(impulse1), vFrictionWriteback1[i / 2]);
|
|
if (vFrictionWriteback2)
|
|
V3StoreU(Vec3V_From_Vec4V_WUndefined(impulse2), vFrictionWriteback2[i / 2]);
|
|
if (vFrictionWriteback3)
|
|
V3StoreU(Vec3V_From_Vec4V_WUndefined(impulse3), vFrictionWriteback3[i / 2]);
|
|
}
|
|
}
|
|
|
|
if (numFrictionConstr)
|
|
{
|
|
PX_ALIGN(16, PxU32 broken[4]);
|
|
BStoreA(hdr->broken, broken);
|
|
|
|
PxU8* frictionCounts = hdr->numNormalConstrs;
|
|
|
|
for (PxU32 a = 0; a < 4; ++a)
|
|
{
|
|
if (frictionCounts[a] && broken[a])
|
|
*hdr->frictionBrokenWritebackByte[a] = 1; // PT: bad L2 miss here
|
|
}
|
|
}
|
|
}
|
|
|
|
PX_UNUSED(writeBackThresholds);
|
|
|
|
#if 0
|
|
if (cache)
|
|
{
|
|
|
|
PX_ALIGN(16, PxReal nf[4]);
|
|
V4StoreA(normalForce, nf);
|
|
|
|
Sc::ShapeInteraction** shapeInteractions = reinterpret_cast<SolverContactHeader4*>(desc[0].constraint)->shapeInteraction;
|
|
|
|
for (PxU32 a = 0; a < 4; ++a)
|
|
{
|
|
if (writeBackThresholds[a] && desc[a].linkIndexA == PxSolverConstraintDesc::NO_LINK && desc[a].linkIndexB == PxSolverConstraintDesc::NO_LINK &&
|
|
nf[a] != 0.f && (bd0[a]->reportThreshold < PX_MAX_REAL || bd1[a]->reportThreshold < PX_MAX_REAL))
|
|
{
|
|
ThresholdStreamElement elt;
|
|
elt.normalForce = nf[a];
|
|
elt.threshold = PxMin<float>(bd0[a]->reportThreshold, bd1[a]->reportThreshold);
|
|
elt.nodeIndexA = bd0[a]->nodeIndex;
|
|
elt.nodeIndexB = bd1[a]->nodeIndex;
|
|
elt.shapeInteraction = shapeInteractions[a];
|
|
PxOrder(elt.nodeIndexA, elt.nodeIndexB);
|
|
PX_ASSERT(elt.nodeIndexA < elt.nodeIndexB);
|
|
PX_ASSERT(cache.mThresholdStreamIndex < cache.mThresholdStreamLength);
|
|
cache.mThresholdStream[cache.mThresholdStreamIndex++] = elt;
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
|
|
void solveContact4(DY_TGS_SOLVE_METHOD_PARAMS)
|
|
{
|
|
PX_UNUSED(txInertias);
|
|
//PX_UNUSED(cache);
|
|
|
|
solveContact4_Block(desc + hdr.startIndex, true, minPenetration, elapsedTime, cache);
|
|
}
|
|
|
|
void writeBackContact4(DY_TGS_WRITEBACK_METHOD_PARAMS)
|
|
{
|
|
writeBackContact4_Block(desc + hdr.startIndex, cache);
|
|
}
|
|
|
|
static PX_FORCE_INLINE Vec4V V4Dot3(const Vec4V& x0, const Vec4V& y0, const Vec4V& z0, const Vec4V& x1, const Vec4V& y1, const Vec4V& z1)
|
|
{
|
|
return V4MulAdd(x0, x1, V4MulAdd(y0, y1, V4Mul(z0, z1)));
|
|
}
|
|
|
|
static void solve1DStep4(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxTGSSolverBodyTxInertia* const txInertias, PxReal elapsedTimeF32, const SolverContext& cache)
|
|
{
|
|
PxU8* PX_RESTRICT bPtr = desc->constraint;
|
|
if (bPtr == NULL)
|
|
return;
|
|
|
|
const FloatV elapsedTime = FLoad(elapsedTimeF32);
|
|
|
|
PxTGSSolverBodyVel& b00 = *desc[0].tgsBodyA;
|
|
PxTGSSolverBodyVel& b01 = *desc[0].tgsBodyB;
|
|
PxTGSSolverBodyVel& b10 = *desc[1].tgsBodyA;
|
|
PxTGSSolverBodyVel& b11 = *desc[1].tgsBodyB;
|
|
PxTGSSolverBodyVel& b20 = *desc[2].tgsBodyA;
|
|
PxTGSSolverBodyVel& b21 = *desc[2].tgsBodyB;
|
|
PxTGSSolverBodyVel& b30 = *desc[3].tgsBodyA;
|
|
PxTGSSolverBodyVel& b31 = *desc[3].tgsBodyB;
|
|
|
|
const PxTGSSolverBodyTxInertia& txI00 = txInertias[desc[0].bodyADataIndex];
|
|
const PxTGSSolverBodyTxInertia& txI01 = txInertias[desc[0].bodyBDataIndex];
|
|
const PxTGSSolverBodyTxInertia& txI10 = txInertias[desc[1].bodyADataIndex];
|
|
const PxTGSSolverBodyTxInertia& txI11 = txInertias[desc[1].bodyBDataIndex];
|
|
const PxTGSSolverBodyTxInertia& txI20 = txInertias[desc[2].bodyADataIndex];
|
|
const PxTGSSolverBodyTxInertia& txI21 = txInertias[desc[2].bodyBDataIndex];
|
|
const PxTGSSolverBodyTxInertia& txI30 = txInertias[desc[3].bodyADataIndex];
|
|
const PxTGSSolverBodyTxInertia& txI31 = txInertias[desc[3].bodyBDataIndex];
|
|
|
|
Vec4V linVel00 = V4LoadA(&b00.linearVelocity.x);
|
|
Vec4V linVel01 = V4LoadA(&b01.linearVelocity.x);
|
|
Vec4V angState00 = V4LoadA(&b00.angularVelocity.x);
|
|
Vec4V angState01 = V4LoadA(&b01.angularVelocity.x);
|
|
|
|
Vec4V linVel10 = V4LoadA(&b10.linearVelocity.x);
|
|
Vec4V linVel11 = V4LoadA(&b11.linearVelocity.x);
|
|
Vec4V angState10 = V4LoadA(&b10.angularVelocity.x);
|
|
Vec4V angState11 = V4LoadA(&b11.angularVelocity.x);
|
|
|
|
Vec4V linVel20 = V4LoadA(&b20.linearVelocity.x);
|
|
Vec4V linVel21 = V4LoadA(&b21.linearVelocity.x);
|
|
Vec4V angState20 = V4LoadA(&b20.angularVelocity.x);
|
|
Vec4V angState21 = V4LoadA(&b21.angularVelocity.x);
|
|
|
|
Vec4V linVel30 = V4LoadA(&b30.linearVelocity.x);
|
|
Vec4V linVel31 = V4LoadA(&b31.linearVelocity.x);
|
|
Vec4V angState30 = V4LoadA(&b30.angularVelocity.x);
|
|
Vec4V angState31 = V4LoadA(&b31.angularVelocity.x);
|
|
|
|
Vec4V linVel0T0, linVel0T1, linVel0T2, linVel0T3;
|
|
Vec4V linVel1T0, linVel1T1, linVel1T2, linVel1T3;
|
|
Vec4V angState0T0, angState0T1, angState0T2, angState0T3;
|
|
Vec4V angState1T0, angState1T1, angState1T2, angState1T3;
|
|
|
|
PX_TRANSPOSE_44(linVel00, linVel10, linVel20, linVel30, linVel0T0, linVel0T1, linVel0T2, linVel0T3);
|
|
PX_TRANSPOSE_44(linVel01, linVel11, linVel21, linVel31, linVel1T0, linVel1T1, linVel1T2, linVel1T3);
|
|
PX_TRANSPOSE_44(angState00, angState10, angState20, angState30, angState0T0, angState0T1, angState0T2, angState0T3);
|
|
PX_TRANSPOSE_44(angState01, angState11, angState21, angState31, angState1T0, angState1T1, angState1T2, angState1T3);
|
|
|
|
Vec4V linDelta00 = V4LoadA(&b00.deltaLinDt.x);
|
|
Vec4V linDelta01 = V4LoadA(&b01.deltaLinDt.x);
|
|
Vec4V angDelta00 = V4LoadA(&b00.deltaAngDt.x);
|
|
Vec4V angDelta01 = V4LoadA(&b01.deltaAngDt.x);
|
|
|
|
Vec4V linDelta10 = V4LoadA(&b10.deltaLinDt.x);
|
|
Vec4V linDelta11 = V4LoadA(&b11.deltaLinDt.x);
|
|
Vec4V angDelta10 = V4LoadA(&b10.deltaAngDt.x);
|
|
Vec4V angDelta11 = V4LoadA(&b11.deltaAngDt.x);
|
|
|
|
Vec4V linDelta20 = V4LoadA(&b20.deltaLinDt.x);
|
|
Vec4V linDelta21 = V4LoadA(&b21.deltaLinDt.x);
|
|
Vec4V angDelta20 = V4LoadA(&b20.deltaAngDt.x);
|
|
Vec4V angDelta21 = V4LoadA(&b21.deltaAngDt.x);
|
|
|
|
Vec4V linDelta30 = V4LoadA(&b30.deltaLinDt.x);
|
|
Vec4V linDelta31 = V4LoadA(&b31.deltaLinDt.x);
|
|
Vec4V angDelta30 = V4LoadA(&b30.deltaAngDt.x);
|
|
Vec4V angDelta31 = V4LoadA(&b31.deltaAngDt.x);
|
|
|
|
Vec4V linDelta0T0, linDelta0T1, linDelta0T2;
|
|
Vec4V linDelta1T0, linDelta1T1, linDelta1T2;
|
|
Vec4V angDelta0T0, angDelta0T1, angDelta0T2;
|
|
Vec4V angDelta1T0, angDelta1T1, angDelta1T2;
|
|
|
|
PX_TRANSPOSE_44_34(linDelta00, linDelta10, linDelta20, linDelta30, linDelta0T0, linDelta0T1, linDelta0T2);
|
|
PX_TRANSPOSE_44_34(linDelta01, linDelta11, linDelta21, linDelta31, linDelta1T0, linDelta1T1, linDelta1T2);
|
|
PX_TRANSPOSE_44_34(angDelta00, angDelta10, angDelta20, angDelta30, angDelta0T0, angDelta0T1, angDelta0T2);
|
|
PX_TRANSPOSE_44_34(angDelta01, angDelta11, angDelta21, angDelta31, angDelta1T0, angDelta1T1, angDelta1T2);
|
|
|
|
const SolverConstraint1DHeaderStep4* PX_RESTRICT header = reinterpret_cast<const SolverConstraint1DHeaderStep4*>(bPtr);
|
|
PxU8* PX_RESTRICT base = reinterpret_cast<PxU8*>(bPtr + sizeof(SolverConstraint1DHeaderStep4));
|
|
|
|
Vec4V invInertia00X = V4LoadU(&txI00.sqrtInvInertia.column0.x); // PT: safe because 'column1' follows 'column0' in PxMat33
|
|
Vec4V invInertia00Y = V4LoadU(&txI00.sqrtInvInertia.column1.x); // PT: safe because 'column2' follows 'column1' in PxMat33
|
|
Vec4V invInertia00Z = Vec4V_From_Vec3V(V3LoadU(txI00.sqrtInvInertia.column2));
|
|
|
|
Vec4V invInertia10X = V4LoadU(&txI10.sqrtInvInertia.column0.x); // PT: safe because 'column1' follows 'column0' in PxMat33
|
|
Vec4V invInertia10Y = V4LoadU(&txI10.sqrtInvInertia.column1.x); // PT: safe because 'column2' follows 'column1' in PxMat33
|
|
Vec4V invInertia10Z = Vec4V_From_Vec3V(V3LoadU(txI10.sqrtInvInertia.column2));
|
|
|
|
Vec4V invInertia20X = V4LoadU(&txI20.sqrtInvInertia.column0.x); // PT: safe because 'column1' follows 'column0' in PxMat33
|
|
Vec4V invInertia20Y = V4LoadU(&txI20.sqrtInvInertia.column1.x); // PT: safe because 'column2' follows 'column1' in PxMat33
|
|
Vec4V invInertia20Z = Vec4V_From_Vec3V(V3LoadU(txI20.sqrtInvInertia.column2));
|
|
|
|
Vec4V invInertia30X = V4LoadU(&txI30.sqrtInvInertia.column0.x); // PT: safe because 'column1' follows 'column0' in PxMat33
|
|
Vec4V invInertia30Y = V4LoadU(&txI30.sqrtInvInertia.column1.x); // PT: safe because 'column2' follows 'column1' in PxMat33
|
|
Vec4V invInertia30Z = Vec4V_From_Vec3V(V3LoadU(txI30.sqrtInvInertia.column2));
|
|
|
|
Vec4V invInertia01X = V4LoadU(&txI01.sqrtInvInertia.column0.x); // PT: safe because 'column1' follows 'column0' in PxMat33
|
|
Vec4V invInertia01Y = V4LoadU(&txI01.sqrtInvInertia.column1.x); // PT: safe because 'column2' follows 'column1' in PxMat33
|
|
Vec4V invInertia01Z = Vec4V_From_Vec3V(V3LoadU(txI01.sqrtInvInertia.column2));
|
|
|
|
Vec4V invInertia11X = V4LoadU(&txI11.sqrtInvInertia.column0.x); // PT: safe because 'column1' follows 'column0' in PxMat33
|
|
Vec4V invInertia11Y = V4LoadU(&txI11.sqrtInvInertia.column1.x); // PT: safe because 'column2' follows 'column1' in PxMat33
|
|
Vec4V invInertia11Z = Vec4V_From_Vec3V(V3LoadU(txI11.sqrtInvInertia.column2));
|
|
|
|
Vec4V invInertia21X = V4LoadU(&txI21.sqrtInvInertia.column0.x); // PT: safe because 'column1' follows 'column0' in PxMat33
|
|
Vec4V invInertia21Y = V4LoadU(&txI21.sqrtInvInertia.column1.x); // PT: safe because 'column2' follows 'column1' in PxMat33
|
|
Vec4V invInertia21Z = Vec4V_From_Vec3V(V3LoadU(txI21.sqrtInvInertia.column2));
|
|
|
|
Vec4V invInertia31X = V4LoadU(&txI31.sqrtInvInertia.column0.x); // PT: safe because 'column1' follows 'column0' in PxMat33
|
|
Vec4V invInertia31Y = V4LoadU(&txI31.sqrtInvInertia.column1.x); // PT: safe because 'column2' follows 'column1' in PxMat33
|
|
Vec4V invInertia31Z = Vec4V_From_Vec3V(V3LoadU(txI31.sqrtInvInertia.column2));
|
|
|
|
Vec4V invInertia0X0, invInertia0X1, invInertia0X2;
|
|
Vec4V invInertia0Y0, invInertia0Y1, invInertia0Y2;
|
|
Vec4V invInertia0Z0, invInertia0Z1, invInertia0Z2;
|
|
|
|
Vec4V invInertia1X0, invInertia1X1, invInertia1X2;
|
|
Vec4V invInertia1Y0, invInertia1Y1, invInertia1Y2;
|
|
Vec4V invInertia1Z0, invInertia1Z1, invInertia1Z2;
|
|
|
|
PX_TRANSPOSE_44_34(invInertia00X, invInertia10X, invInertia20X, invInertia30X, invInertia0X0, invInertia0Y0, invInertia0Z0);
|
|
PX_TRANSPOSE_44_34(invInertia00Y, invInertia10Y, invInertia20Y, invInertia30Y, invInertia0X1, invInertia0Y1, invInertia0Z1);
|
|
PX_TRANSPOSE_44_34(invInertia00Z, invInertia10Z, invInertia20Z, invInertia30Z, invInertia0X2, invInertia0Y2, invInertia0Z2);
|
|
|
|
PX_TRANSPOSE_44_34(invInertia01X, invInertia11X, invInertia21X, invInertia31X, invInertia1X0, invInertia1Y0, invInertia1Z0);
|
|
PX_TRANSPOSE_44_34(invInertia01Y, invInertia11Y, invInertia21Y, invInertia31Y, invInertia1X1, invInertia1Y1, invInertia1Z1);
|
|
PX_TRANSPOSE_44_34(invInertia01Z, invInertia11Z, invInertia21Z, invInertia31Z, invInertia1X2, invInertia1Y2, invInertia1Z2);
|
|
|
|
const Vec4V invInertiaScale0 = header->angD0;
|
|
const Vec4V invInertiaScale1 = header->angD1;
|
|
|
|
//KS - todo - load this a bit quicker...
|
|
Vec4V rot00 = V4LoadA(&txI00.deltaBody2WorldQ.x);
|
|
Vec4V rot01 = V4LoadA(&txI01.deltaBody2WorldQ.x);
|
|
Vec4V rot10 = V4LoadA(&txI10.deltaBody2WorldQ.x);
|
|
Vec4V rot11 = V4LoadA(&txI11.deltaBody2WorldQ.x);
|
|
Vec4V rot20 = V4LoadA(&txI20.deltaBody2WorldQ.x);
|
|
Vec4V rot21 = V4LoadA(&txI21.deltaBody2WorldQ.x);
|
|
Vec4V rot30 = V4LoadA(&txI30.deltaBody2WorldQ.x);
|
|
Vec4V rot31 = V4LoadA(&txI31.deltaBody2WorldQ.x);
|
|
|
|
Vec4V rot0X, rot0Y, rot0Z, rot0W;
|
|
Vec4V rot1X, rot1Y, rot1Z, rot1W;
|
|
|
|
PX_TRANSPOSE_44(rot00, rot10, rot20, rot30, rot0X, rot0Y, rot0Z, rot0W);
|
|
PX_TRANSPOSE_44(rot01, rot11, rot21, rot31, rot1X, rot1Y, rot1Z, rot1W);
|
|
|
|
Vec4V raX, raY, raZ;
|
|
Vec4V rbX, rbY, rbZ;
|
|
|
|
QuatRotate4(rot0X, rot0Y, rot0Z, rot0W, header->rAWorld[0], header->rAWorld[1], header->rAWorld[2], raX, raY, raZ);
|
|
QuatRotate4(rot1X, rot1Y, rot1Z, rot1W, header->rBWorld[0], header->rBWorld[1], header->rBWorld[2], rbX, rbY, rbZ);
|
|
|
|
const Vec4V raMotionX = V4Sub(V4Add(raX, linDelta0T0), header->rAWorld[0]);
|
|
const Vec4V raMotionY = V4Sub(V4Add(raY, linDelta0T1), header->rAWorld[1]);
|
|
const Vec4V raMotionZ = V4Sub(V4Add(raZ, linDelta0T2), header->rAWorld[2]);
|
|
const Vec4V rbMotionX = V4Sub(V4Add(rbX, linDelta1T0), header->rBWorld[0]);
|
|
const Vec4V rbMotionY = V4Sub(V4Add(rbY, linDelta1T1), header->rBWorld[1]);
|
|
const Vec4V rbMotionZ = V4Sub(V4Add(rbZ, linDelta1T2), header->rBWorld[2]);
|
|
|
|
const Vec4V mass0 = header->invMass0D0;
|
|
const Vec4V mass1 = header->invMass1D1;
|
|
|
|
const VecU32V orthoMask = U4Load(DY_SC_FLAG_ORTHO_TARGET);
|
|
const VecU32V limitMask = U4Load(DY_SC_FLAG_INEQUALITY);
|
|
const VecU32V springFlagMask = U4Load(DY_SC_FLAG_SPRING);
|
|
const Vec4V zero = V4Zero();
|
|
const Vec4V one = V4One();
|
|
|
|
Vec4V error0 = V4Add(header->angOrthoError[0],
|
|
V4Sub(V4Dot3(header->angOrthoAxis0X[0], header->angOrthoAxis0Y[0], header->angOrthoAxis0Z[0], angDelta0T0, angDelta0T1, angDelta0T2),
|
|
V4Dot3(header->angOrthoAxis1X[0], header->angOrthoAxis1Y[0], header->angOrthoAxis1Z[0], angDelta1T0, angDelta1T1, angDelta1T2)));
|
|
|
|
Vec4V error1 = V4Add(header->angOrthoError[1],
|
|
V4Sub(V4Dot3(header->angOrthoAxis0X[1], header->angOrthoAxis0Y[1], header->angOrthoAxis0Z[1], angDelta0T0, angDelta0T1, angDelta0T2),
|
|
V4Dot3(header->angOrthoAxis1X[1], header->angOrthoAxis1Y[1], header->angOrthoAxis1Z[1], angDelta1T0, angDelta1T1, angDelta1T2)));
|
|
|
|
Vec4V error2 = V4Add(header->angOrthoError[2],
|
|
V4Sub(V4Dot3(header->angOrthoAxis0X[2], header->angOrthoAxis0Y[2], header->angOrthoAxis0Z[2], angDelta0T0, angDelta0T1, angDelta0T2),
|
|
V4Dot3(header->angOrthoAxis1X[2], header->angOrthoAxis1Y[2], header->angOrthoAxis1Z[2], angDelta1T0, angDelta1T1, angDelta1T2)));
|
|
|
|
PxU32 stride = cache.contactErrorAccumulator ? sizeof(SolverConstraint1DStep4WithResidual) : sizeof(SolverConstraint1DStep4);
|
|
|
|
const PxU32 count = header->count;
|
|
for (PxU32 i = 0; i<count; ++i/*, base++*/)
|
|
{
|
|
PxPrefetchLine(base + stride);
|
|
SolverConstraint1DStep4& c = reinterpret_cast<SolverConstraint1DStep4&>(*base);
|
|
|
|
const Vec4V cangVel0X = V4Add(c.ang0[0], V4NegMulSub(raZ, c.lin0[1], V4Mul(raY, c.lin0[2])));
|
|
const Vec4V cangVel0Y = V4Add(c.ang0[1], V4NegMulSub(raX, c.lin0[2], V4Mul(raZ, c.lin0[0])));
|
|
const Vec4V cangVel0Z = V4Add(c.ang0[2], V4NegMulSub(raY, c.lin0[0], V4Mul(raX, c.lin0[1])));
|
|
|
|
const Vec4V cangVel1X = V4Add(c.ang1[0], V4NegMulSub(rbZ, c.lin1[1], V4Mul(rbY, c.lin1[2])));
|
|
const Vec4V cangVel1Y = V4Add(c.ang1[1], V4NegMulSub(rbX, c.lin1[2], V4Mul(rbZ, c.lin1[0])));
|
|
const Vec4V cangVel1Z = V4Add(c.ang1[2], V4NegMulSub(rbY, c.lin1[0], V4Mul(rbX, c.lin1[1])));
|
|
|
|
const VecU32V flags = U4LoadA(c.flags);
|
|
|
|
const BoolV useOrtho = V4IsEqU32(V4U32and(flags, orthoMask), orthoMask);
|
|
|
|
const Vec4V angOrthoCoefficient = V4Sel(useOrtho, one, zero);
|
|
|
|
Vec4V delAngVel0X = V4Mul(invInertia0X0, cangVel0X);
|
|
Vec4V delAngVel0Y = V4Mul(invInertia0X1, cangVel0X);
|
|
Vec4V delAngVel0Z = V4Mul(invInertia0X2, cangVel0X);
|
|
|
|
delAngVel0X = V4MulAdd(invInertia0Y0, cangVel0Y, delAngVel0X);
|
|
delAngVel0Y = V4MulAdd(invInertia0Y1, cangVel0Y, delAngVel0Y);
|
|
delAngVel0Z = V4MulAdd(invInertia0Y2, cangVel0Y, delAngVel0Z);
|
|
|
|
delAngVel0X = V4MulAdd(invInertia0Z0, cangVel0Z, delAngVel0X);
|
|
delAngVel0Y = V4MulAdd(invInertia0Z1, cangVel0Z, delAngVel0Y);
|
|
delAngVel0Z = V4MulAdd(invInertia0Z2, cangVel0Z, delAngVel0Z);
|
|
|
|
Vec4V delAngVel1X = V4Mul(invInertia1X0, cangVel1X);
|
|
Vec4V delAngVel1Y = V4Mul(invInertia1X1, cangVel1X);
|
|
Vec4V delAngVel1Z = V4Mul(invInertia1X2, cangVel1X);
|
|
|
|
delAngVel1X = V4MulAdd(invInertia1Y0, cangVel1Y, delAngVel1X);
|
|
delAngVel1Y = V4MulAdd(invInertia1Y1, cangVel1Y, delAngVel1Y);
|
|
delAngVel1Z = V4MulAdd(invInertia1Y2, cangVel1Y, delAngVel1Z);
|
|
|
|
delAngVel1X = V4MulAdd(invInertia1Z0, cangVel1Z, delAngVel1X);
|
|
delAngVel1Y = V4MulAdd(invInertia1Z1, cangVel1Z, delAngVel1Y);
|
|
delAngVel1Z = V4MulAdd(invInertia1Z2, cangVel1Z, delAngVel1Z);
|
|
|
|
Vec4V err = c.error;
|
|
{
|
|
const Vec4V proj0 = V4Mul(V4MulAdd(header->angOrthoAxis0X[0], delAngVel0X, V4MulAdd(header->angOrthoAxis0Y[0], delAngVel0Y,
|
|
V4MulAdd(header->angOrthoAxis0Z[0], delAngVel0Z, V4MulAdd(header->angOrthoAxis1X[0], delAngVel1X,
|
|
V4MulAdd(header->angOrthoAxis1Y[0], delAngVel1Y, V4Mul(header->angOrthoAxis1Z[0], delAngVel1Z)))))), header->angOrthoRecipResponse[0]);
|
|
|
|
const Vec4V proj1 = V4Mul(V4MulAdd(header->angOrthoAxis0X[1], delAngVel0X, V4MulAdd(header->angOrthoAxis0Y[1], delAngVel0Y,
|
|
V4MulAdd(header->angOrthoAxis0Z[1], delAngVel0Z, V4MulAdd(header->angOrthoAxis1X[1], delAngVel1X,
|
|
V4MulAdd(header->angOrthoAxis1Y[1], delAngVel1Y, V4Mul(header->angOrthoAxis1Z[1], delAngVel1Z)))))), header->angOrthoRecipResponse[1]);
|
|
|
|
const Vec4V proj2 = V4Mul(V4MulAdd(header->angOrthoAxis0X[2], delAngVel0X, V4MulAdd(header->angOrthoAxis0Y[2], delAngVel0Y,
|
|
V4MulAdd(header->angOrthoAxis0Z[2], delAngVel0Z, V4MulAdd(header->angOrthoAxis1X[2], delAngVel1X,
|
|
V4MulAdd(header->angOrthoAxis1Y[2], delAngVel1Y, V4Mul(header->angOrthoAxis1Z[2], delAngVel1Z)))))), header->angOrthoRecipResponse[2]);
|
|
|
|
const Vec4V delta0X = V4MulAdd(header->angOrthoAxis0X[0], proj0, V4MulAdd(header->angOrthoAxis0X[1], proj1, V4Mul(header->angOrthoAxis0X[2], proj2)));
|
|
const Vec4V delta0Y = V4MulAdd(header->angOrthoAxis0Y[0], proj0, V4MulAdd(header->angOrthoAxis0Y[1], proj1, V4Mul(header->angOrthoAxis0Y[2], proj2)));
|
|
const Vec4V delta0Z = V4MulAdd(header->angOrthoAxis0Z[0], proj0, V4MulAdd(header->angOrthoAxis0Z[1], proj1, V4Mul(header->angOrthoAxis0Z[2], proj2)));
|
|
|
|
const Vec4V delta1X = V4MulAdd(header->angOrthoAxis1X[0], proj0, V4MulAdd(header->angOrthoAxis1X[1], proj1, V4Mul(header->angOrthoAxis1X[2], proj2)));
|
|
const Vec4V delta1Y = V4MulAdd(header->angOrthoAxis1Y[0], proj0, V4MulAdd(header->angOrthoAxis1Y[1], proj1, V4Mul(header->angOrthoAxis1Y[2], proj2)));
|
|
const Vec4V delta1Z = V4MulAdd(header->angOrthoAxis1Z[0], proj0, V4MulAdd(header->angOrthoAxis1Z[1], proj1, V4Mul(header->angOrthoAxis1Z[2], proj2)));
|
|
|
|
delAngVel0X = V4NegMulSub(delta0X, angOrthoCoefficient, delAngVel0X);
|
|
delAngVel0Y = V4NegMulSub(delta0Y, angOrthoCoefficient, delAngVel0Y);
|
|
delAngVel0Z = V4NegMulSub(delta0Z, angOrthoCoefficient, delAngVel0Z);
|
|
|
|
delAngVel1X = V4NegMulSub(delta1X, angOrthoCoefficient, delAngVel1X);
|
|
delAngVel1Y = V4NegMulSub(delta1Y, angOrthoCoefficient, delAngVel1Y);
|
|
delAngVel1Z = V4NegMulSub(delta1Z, angOrthoCoefficient, delAngVel1Z);
|
|
|
|
const Vec4V orthoBasisError = V4Mul(c.biasScale, V4MulAdd(error0, proj0, V4MulAdd(error1, proj1, V4Mul(error2, proj2))));
|
|
err = V4Sub(err, V4Mul(orthoBasisError, angOrthoCoefficient));
|
|
}
|
|
|
|
Vec4V ang0IX = V4Mul(invInertia0X0, delAngVel0X);
|
|
Vec4V ang0IY = V4Mul(invInertia0X1, delAngVel0X);
|
|
Vec4V ang0IZ = V4Mul(invInertia0X2, delAngVel0X);
|
|
|
|
ang0IX = V4MulAdd(invInertia0Y0, delAngVel0Y, ang0IX);
|
|
ang0IY = V4MulAdd(invInertia0Y1, delAngVel0Y, ang0IY);
|
|
ang0IZ = V4MulAdd(invInertia0Y2, delAngVel0Y, ang0IZ);
|
|
|
|
ang0IX = V4MulAdd(invInertia0Z0, delAngVel0Z, ang0IX);
|
|
ang0IY = V4MulAdd(invInertia0Z1, delAngVel0Z, ang0IY);
|
|
ang0IZ = V4MulAdd(invInertia0Z2, delAngVel0Z, ang0IZ);
|
|
|
|
Vec4V ang1IX = V4Mul(invInertia1X0, delAngVel1X);
|
|
Vec4V ang1IY = V4Mul(invInertia1X1, delAngVel1X);
|
|
Vec4V ang1IZ = V4Mul(invInertia1X2, delAngVel1X);
|
|
|
|
ang1IX = V4MulAdd(invInertia1Y0, delAngVel1Y, ang1IX);
|
|
ang1IY = V4MulAdd(invInertia1Y1, delAngVel1Y, ang1IY);
|
|
ang1IZ = V4MulAdd(invInertia1Y2, delAngVel1Y, ang1IZ);
|
|
|
|
ang1IX = V4MulAdd(invInertia1Z0, delAngVel1Z, ang1IX);
|
|
ang1IY = V4MulAdd(invInertia1Z1, delAngVel1Z, ang1IY);
|
|
ang1IZ = V4MulAdd(invInertia1Z2, delAngVel1Z, ang1IZ);
|
|
|
|
const Vec4V clinVel0X = c.lin0[0];
|
|
const Vec4V clinVel0Y = c.lin0[1];
|
|
const Vec4V clinVel0Z = c.lin0[2];
|
|
|
|
const Vec4V clinVel1X = c.lin1[0];
|
|
const Vec4V clinVel1Y = c.lin1[1];
|
|
const Vec4V clinVel1Z = c.lin1[2];
|
|
|
|
const Vec4V clinVel0X_ = c.lin0[0];
|
|
const Vec4V clinVel0Y_ = c.lin0[1];
|
|
const Vec4V clinVel0Z_ = c.lin0[2];
|
|
|
|
const Vec4V clinVel1X_ = c.lin1[0];
|
|
const Vec4V clinVel1Y_ = c.lin1[1];
|
|
const Vec4V clinVel1Z_ = c.lin1[2];
|
|
|
|
const aos::BoolV isSpringConstraint = V4IsEqU32(V4U32and(flags, springFlagMask), springFlagMask);
|
|
|
|
const Vec4V errorChange = computeResolvedGeometricErrorTGSBlock(
|
|
raMotionX, raMotionY, raMotionZ,
|
|
rbMotionX, rbMotionY, rbMotionZ,
|
|
clinVel0X_, clinVel0Y_, clinVel0Z_,
|
|
clinVel1X_, clinVel1Y_, clinVel1Z_,
|
|
angDelta0T0, angDelta0T1, angDelta0T2,
|
|
angDelta1T0, angDelta1T1, angDelta1T2,
|
|
delAngVel0X, delAngVel0Y, delAngVel0Z,
|
|
delAngVel1X, delAngVel1Y, delAngVel1Z,
|
|
c.angularErrorScale,
|
|
isSpringConstraint, c.velTarget, elapsedTime);
|
|
|
|
//KS - compute raXnI and effective mass. Unfortunately, the joints are noticeably less stable if we don't do this each
|
|
//iteration. It's skippable. If we do that, there's no need for the invInertiaTensors
|
|
|
|
const Vec4V dotDelAngVel0 = V4MulAdd(delAngVel0X, delAngVel0X, V4MulAdd(delAngVel0Y, delAngVel0Y, V4Mul(delAngVel0Z, delAngVel0Z)));
|
|
const Vec4V dotDelAngVel1 = V4MulAdd(delAngVel1X, delAngVel1X, V4MulAdd(delAngVel1Y, delAngVel1Y, V4Mul(delAngVel1Z, delAngVel1Z)));
|
|
|
|
const Vec4V dotClinVel0 = V4MulAdd(clinVel0X, clinVel0X, V4MulAdd(clinVel0Y, clinVel0Y, V4Mul(clinVel0Z, clinVel0Z)));
|
|
const Vec4V dotClinVel1 = V4MulAdd(clinVel1X, clinVel1X, V4MulAdd(clinVel1Y, clinVel1Y, V4Mul(clinVel1Z, clinVel1Z)));
|
|
|
|
const Vec4V resp0 = V4MulAdd(mass0, dotClinVel0, V4Mul(invInertiaScale0, dotDelAngVel0));
|
|
const Vec4V resp1 = V4MulAdd(mass1, dotClinVel1, V4Mul(invInertiaScale1, dotDelAngVel1));
|
|
const Vec4V response = V4Add(resp0, resp1);
|
|
const Vec4V recipResponse = V4Sel(V4IsGrtr(response, V4Zero()), V4Recip(response), V4Zero());
|
|
|
|
const Vec4V vMul = V4Sel(isSpringConstraint, c.velMultiplier, V4Mul(recipResponse, c.velMultiplier));
|
|
|
|
const Vec4V minBias = computeMinBiasTGSBlock(flags, limitMask, c.maxBias);
|
|
const Vec4V unclampedBias = V4MulAdd(errorChange, c.biasScale, err);
|
|
const Vec4V bias = V4Clamp(unclampedBias, minBias, c.maxBias);
|
|
|
|
const Vec4V constant = V4Sel(isSpringConstraint, V4Add(bias, c.velTarget), V4Mul(recipResponse, V4Add(bias, c.velTarget)));
|
|
|
|
const Vec4V normalVel0 = V4MulAdd(clinVel0X_, linVel0T0, V4MulAdd(clinVel0Y_, linVel0T1, V4Mul(clinVel0Z_, linVel0T2)));
|
|
const Vec4V normalVel1 = V4MulAdd(clinVel1X_, linVel1T0, V4MulAdd(clinVel1Y_, linVel1T1, V4Mul(clinVel1Z_, linVel1T2)));
|
|
|
|
const Vec4V angVel0 = V4MulAdd(delAngVel0X, angState0T0, V4MulAdd(delAngVel0Y, angState0T1, V4Mul(delAngVel0Z, angState0T2)));
|
|
const Vec4V angVel1 = V4MulAdd(delAngVel1X, angState1T0, V4MulAdd(delAngVel1Y, angState1T1, V4Mul(delAngVel1Z, angState1T2)));
|
|
|
|
const Vec4V normalVel = V4Add(V4Sub(normalVel0, normalVel1), V4Sub(angVel0, angVel1));
|
|
|
|
const Vec4V unclampedForce = V4Add(c.appliedForce, V4MulAdd(vMul, normalVel, constant));
|
|
const Vec4V clampedForce = V4Clamp(unclampedForce, c.minImpulse, c.maxImpulse);
|
|
const Vec4V deltaF = V4Sub(clampedForce, c.appliedForce);
|
|
|
|
c.appliedForce = clampedForce;
|
|
if (cache.contactErrorAccumulator)
|
|
{
|
|
SolverConstraint1DStep4WithResidual& cc = static_cast<SolverConstraint1DStep4WithResidual&>(c);
|
|
const Vec4V residual = Dy::calculateResidualV4(deltaF, vMul);
|
|
if (cache.isPositionIteration)
|
|
cc.residualPosIter = residual;
|
|
else
|
|
cc.residualVelIter = residual;
|
|
}
|
|
|
|
const Vec4V deltaFIM0 = V4Mul(deltaF, mass0);
|
|
const Vec4V deltaFIM1 = V4Mul(deltaF, mass1);
|
|
const Vec4V angDetaF0 = V4Mul(deltaF, invInertiaScale0);
|
|
const Vec4V angDetaF1 = V4Mul(deltaF, invInertiaScale1);
|
|
|
|
linVel0T0 = V4MulAdd(clinVel0X_, deltaFIM0, linVel0T0);
|
|
linVel1T0 = V4NegMulSub(clinVel1X_, deltaFIM1, linVel1T0);
|
|
angState0T0 = V4MulAdd(delAngVel0X, angDetaF0, angState0T0);
|
|
angState1T0 = V4NegMulSub(delAngVel1X, angDetaF1, angState1T0);
|
|
|
|
linVel0T1 = V4MulAdd(clinVel0Y_, deltaFIM0, linVel0T1);
|
|
linVel1T1 = V4NegMulSub(clinVel1Y_, deltaFIM1, linVel1T1);
|
|
angState0T1 = V4MulAdd(delAngVel0Y, angDetaF0, angState0T1);
|
|
angState1T1 = V4NegMulSub(delAngVel1Y, angDetaF1, angState1T1);
|
|
|
|
linVel0T2 = V4MulAdd(clinVel0Z_, deltaFIM0, linVel0T2);
|
|
linVel1T2 = V4NegMulSub(clinVel1Z_, deltaFIM1, linVel1T2);
|
|
angState0T2 = V4MulAdd(delAngVel0Z, angDetaF0, angState0T2);
|
|
angState1T2 = V4NegMulSub(delAngVel1Z, angDetaF1, angState1T2);
|
|
|
|
base += stride;
|
|
}
|
|
|
|
PX_TRANSPOSE_44(linVel0T0, linVel0T1, linVel0T2, linVel0T3, linVel00, linVel10, linVel20, linVel30);
|
|
PX_TRANSPOSE_44(linVel1T0, linVel1T1, linVel1T2, linVel1T3, linVel01, linVel11, linVel21, linVel31);
|
|
PX_TRANSPOSE_44(angState0T0, angState0T1, angState0T2, angState0T3, angState00, angState10, angState20, angState30);
|
|
PX_TRANSPOSE_44(angState1T0, angState1T1, angState1T2, angState1T3, angState01, angState11, angState21, angState31);
|
|
|
|
PX_ASSERT(b00.linearVelocity.isFinite());
|
|
PX_ASSERT(b00.angularVelocity.isFinite());
|
|
PX_ASSERT(b10.linearVelocity.isFinite());
|
|
PX_ASSERT(b10.angularVelocity.isFinite());
|
|
PX_ASSERT(b20.linearVelocity.isFinite());
|
|
PX_ASSERT(b20.angularVelocity.isFinite());
|
|
PX_ASSERT(b30.linearVelocity.isFinite());
|
|
PX_ASSERT(b30.angularVelocity.isFinite());
|
|
|
|
PX_ASSERT(b01.linearVelocity.isFinite());
|
|
PX_ASSERT(b01.angularVelocity.isFinite());
|
|
PX_ASSERT(b11.linearVelocity.isFinite());
|
|
PX_ASSERT(b11.angularVelocity.isFinite());
|
|
PX_ASSERT(b21.linearVelocity.isFinite());
|
|
PX_ASSERT(b21.angularVelocity.isFinite());
|
|
PX_ASSERT(b31.linearVelocity.isFinite());
|
|
PX_ASSERT(b31.angularVelocity.isFinite());
|
|
|
|
// Write back
|
|
V4StoreA(linVel00, &b00.linearVelocity.x);
|
|
V4StoreA(angState00, &b00.angularVelocity.x);
|
|
V4StoreA(linVel10, &b10.linearVelocity.x);
|
|
V4StoreA(angState10, &b10.angularVelocity.x);
|
|
V4StoreA(linVel20, &b20.linearVelocity.x);
|
|
V4StoreA(angState20, &b20.angularVelocity.x);
|
|
V4StoreA(linVel30, &b30.linearVelocity.x);
|
|
V4StoreA(angState30, &b30.angularVelocity.x);
|
|
|
|
V4StoreA(linVel01, &b01.linearVelocity.x);
|
|
V4StoreA(angState01, &b01.angularVelocity.x);
|
|
V4StoreA(linVel11, &b11.linearVelocity.x);
|
|
V4StoreA(angState11, &b11.angularVelocity.x);
|
|
V4StoreA(linVel21, &b21.linearVelocity.x);
|
|
V4StoreA(angState21, &b21.angularVelocity.x);
|
|
V4StoreA(linVel31, &b31.linearVelocity.x);
|
|
V4StoreA(angState31, &b31.angularVelocity.x);
|
|
|
|
PX_ASSERT(b00.linearVelocity.isFinite());
|
|
PX_ASSERT(b00.angularVelocity.isFinite());
|
|
PX_ASSERT(b10.linearVelocity.isFinite());
|
|
PX_ASSERT(b10.angularVelocity.isFinite());
|
|
PX_ASSERT(b20.linearVelocity.isFinite());
|
|
PX_ASSERT(b20.angularVelocity.isFinite());
|
|
PX_ASSERT(b30.linearVelocity.isFinite());
|
|
PX_ASSERT(b30.angularVelocity.isFinite());
|
|
|
|
PX_ASSERT(b01.linearVelocity.isFinite());
|
|
PX_ASSERT(b01.angularVelocity.isFinite());
|
|
PX_ASSERT(b11.linearVelocity.isFinite());
|
|
PX_ASSERT(b11.angularVelocity.isFinite());
|
|
PX_ASSERT(b21.linearVelocity.isFinite());
|
|
PX_ASSERT(b21.angularVelocity.isFinite());
|
|
PX_ASSERT(b31.linearVelocity.isFinite());
|
|
PX_ASSERT(b31.angularVelocity.isFinite());
|
|
}
|
|
|
|
void solve1D4(DY_TGS_SOLVE_METHOD_PARAMS)
|
|
{
|
|
PX_UNUSED(minPenetration);
|
|
PX_UNUSED(cache);
|
|
|
|
solve1DStep4(desc + hdr.startIndex, txInertias, elapsedTime, cache);
|
|
}
|
|
|
|
void writeBack1D4(DY_TGS_WRITEBACK_METHOD_PARAMS)
|
|
{
|
|
PX_UNUSED(cache);
|
|
|
|
ConstraintWriteback* writeback0 = reinterpret_cast<ConstraintWriteback*>(desc[hdr.startIndex].writeBack);
|
|
ConstraintWriteback* writeback1 = reinterpret_cast<ConstraintWriteback*>(desc[hdr.startIndex + 1].writeBack);
|
|
ConstraintWriteback* writeback2 = reinterpret_cast<ConstraintWriteback*>(desc[hdr.startIndex + 2].writeBack);
|
|
ConstraintWriteback* writeback3 = reinterpret_cast<ConstraintWriteback*>(desc[hdr.startIndex + 3].writeBack);
|
|
|
|
if (writeback0 || writeback1 || writeback2 || writeback3)
|
|
{
|
|
SolverConstraint1DHeaderStep4* header = reinterpret_cast<SolverConstraint1DHeaderStep4*>(desc[hdr.startIndex].constraint);
|
|
PxU8* base = reinterpret_cast<PxU8*>(desc[hdr.startIndex].constraint + sizeof(SolverConstraint1DHeaderStep4));
|
|
PxU32 stride = cache->contactErrorAccumulator ? sizeof(SolverConstraint1DStep4WithResidual) : sizeof(SolverConstraint1DStep4);
|
|
|
|
const Vec4V zero = V4Zero();
|
|
Vec4V linX(zero), linY(zero), linZ(zero);
|
|
Vec4V angX(zero), angY(zero), angZ(zero);
|
|
Vec4V residual(zero);
|
|
Vec4V residualPosIter(zero);
|
|
|
|
const PxU32 count = header->count;
|
|
for (PxU32 i = 0; i<count; i++)
|
|
{
|
|
const SolverConstraint1DStep4* c = reinterpret_cast<const SolverConstraint1DStep4*>(base);
|
|
|
|
//Load in flags
|
|
const VecI32V flags = I4LoadU(reinterpret_cast<const PxI32*>(&c->flags[0]));
|
|
//Work out masks
|
|
const VecI32V mask = I4Load(DY_SC_FLAG_OUTPUT_FORCE);
|
|
|
|
const VecI32V masked = VecI32V_And(flags, mask);
|
|
const BoolV isEq = VecI32V_IsEq(masked, mask);
|
|
|
|
const Vec4V appliedForce = V4Sel(isEq, c->appliedForce, zero);
|
|
|
|
if (cache->contactErrorAccumulator)
|
|
{
|
|
const SolverConstraint1DStep4WithResidual* cc = static_cast<const SolverConstraint1DStep4WithResidual*>(c);
|
|
residual = V4MulAdd(cc->residualVelIter, cc->residualVelIter, residual);
|
|
residualPosIter = V4MulAdd(cc->residualPosIter, cc->residualPosIter, residualPosIter);
|
|
}
|
|
|
|
linX = V4MulAdd(c->lin0[0], appliedForce, linX);
|
|
linY = V4MulAdd(c->lin0[1], appliedForce, linY);
|
|
linZ = V4MulAdd(c->lin0[2], appliedForce, linZ);
|
|
|
|
angX = V4MulAdd(c->ang0[0], appliedForce, angX);
|
|
angY = V4MulAdd(c->ang0[1], appliedForce, angY);
|
|
angZ = V4MulAdd(c->ang0[2], appliedForce, angZ);
|
|
|
|
base += stride;
|
|
}
|
|
|
|
//We need to do the cross product now
|
|
|
|
angX = V4Sub(angX, V4NegMulSub(header->body0WorkOffset[0], linY, V4Mul(header->body0WorkOffset[1], linZ)));
|
|
angY = V4Sub(angY, V4NegMulSub(header->body0WorkOffset[1], linZ, V4Mul(header->body0WorkOffset[2], linX)));
|
|
angZ = V4Sub(angZ, V4NegMulSub(header->body0WorkOffset[2], linX, V4Mul(header->body0WorkOffset[0], linY)));
|
|
|
|
const Vec4V linLenSq = V4MulAdd(linZ, linZ, V4MulAdd(linY, linY, V4Mul(linX, linX)));
|
|
const Vec4V angLenSq = V4MulAdd(angZ, angZ, V4MulAdd(angY, angY, V4Mul(angX, angX)));
|
|
|
|
const Vec4V linLen = V4Sqrt(linLenSq);
|
|
const Vec4V angLen = V4Sqrt(angLenSq);
|
|
|
|
const BoolV broken = BOr(V4IsGrtr(linLen, header->linBreakImpulse), V4IsGrtr(angLen, header->angBreakImpulse));
|
|
|
|
PX_ALIGN(16, PxU32 iBroken[4]);
|
|
BStoreA(broken, iBroken);
|
|
|
|
PX_ALIGN(16, PxReal residual4[4]);
|
|
V4StoreA(residual, residual4);
|
|
|
|
PX_ALIGN(16, PxReal residual4PosIter[4]);
|
|
V4StoreA(residualPosIter, residual4PosIter);
|
|
|
|
Vec4V lin0, lin1, lin2, lin3;
|
|
Vec4V ang0, ang1, ang2, ang3;
|
|
|
|
PX_TRANSPOSE_34_44(linX, linY, linZ, lin0, lin1, lin2, lin3);
|
|
PX_TRANSPOSE_34_44(angX, angY, angZ, ang0, ang1, ang2, ang3);
|
|
|
|
if (writeback0)
|
|
{
|
|
V3StoreU(Vec3V_From_Vec4V_WUndefined(lin0), writeback0->linearImpulse);
|
|
V3StoreU(Vec3V_From_Vec4V_WUndefined(ang0), writeback0->angularImpulse);
|
|
writeback0->setCombined(header->breakable[0] ? PxU32(iBroken[0] != 0) : 0, residual4PosIter[0]);
|
|
writeback0->residual = residual4[0];
|
|
}
|
|
if (writeback1)
|
|
{
|
|
V3StoreU(Vec3V_From_Vec4V_WUndefined(lin1), writeback1->linearImpulse);
|
|
V3StoreU(Vec3V_From_Vec4V_WUndefined(ang1), writeback1->angularImpulse);
|
|
writeback1->setCombined(header->breakable[1] ? PxU32(iBroken[1] != 0) : 0, residual4PosIter[1]);
|
|
writeback1->residual = residual4[1];
|
|
}
|
|
if (writeback2)
|
|
{
|
|
V3StoreU(Vec3V_From_Vec4V_WUndefined(lin2), writeback2->linearImpulse);
|
|
V3StoreU(Vec3V_From_Vec4V_WUndefined(ang2), writeback2->angularImpulse);
|
|
writeback2->setCombined(header->breakable[2] ? PxU32(iBroken[2] != 0) : 0, residual4PosIter[2]);
|
|
writeback2->residual = residual4[2];
|
|
}
|
|
if (writeback3)
|
|
{
|
|
V3StoreU(Vec3V_From_Vec4V_WUndefined(lin3), writeback3->linearImpulse);
|
|
V3StoreU(Vec3V_From_Vec4V_WUndefined(ang3), writeback3->angularImpulse);
|
|
writeback3->setCombined(header->breakable[3] ? PxU32(iBroken[3] != 0) : 0, residual4PosIter[3]);
|
|
writeback3->residual = residual4[3];
|
|
}
|
|
}
|
|
}
|
|
|
|
static void concludeContact4_Block(const PxSolverConstraintDesc* PX_RESTRICT desc)
|
|
{
|
|
PX_UNUSED(desc);
|
|
//const PxU8* PX_RESTRICT last = desc[0].constraint + getConstraintLength(desc[0]);
|
|
|
|
////hopefully pointer aliasing doesn't bite.
|
|
//PxU8* PX_RESTRICT currPtr = desc[0].constraint;
|
|
|
|
//const Vec4V zero = V4Zero();
|
|
|
|
////const PxU8 type = *desc[0].constraint;
|
|
//const PxU32 contactSize = sizeof(SolverContactPointStepBlock);
|
|
//const PxU32 frictionSize = sizeof(SolverContactFrictionStepBlock);
|
|
|
|
//while ((currPtr < last))
|
|
//{
|
|
// SolverContactHeaderStepBlock* PX_RESTRICT hdr = reinterpret_cast<SolverContactHeaderStepBlock*>(currPtr);
|
|
|
|
// currPtr = reinterpret_cast<PxU8*>(hdr + 1);
|
|
|
|
// const PxU32 numNormalConstr = hdr->numNormalConstr;
|
|
// const PxU32 numFrictionConstr = hdr->numFrictionConstr;
|
|
|
|
// //Applied forces
|
|
// currPtr += sizeof(Vec4V)*numNormalConstr;
|
|
|
|
// //SolverContactPointStepBlock* PX_RESTRICT contacts = reinterpret_cast<SolverContactPointStepBlock*>(currPtr);
|
|
// currPtr += (numNormalConstr * contactSize);
|
|
|
|
// bool hasMaxImpulse = (hdr->flag & SolverContactHeader4::eHAS_MAX_IMPULSE) != 0;
|
|
|
|
// if (hasMaxImpulse)
|
|
// currPtr += sizeof(Vec4V) * numNormalConstr;
|
|
|
|
// currPtr += sizeof(Vec4V)*numFrictionConstr;
|
|
|
|
// SolverContactFrictionStepBlock* PX_RESTRICT frictions = reinterpret_cast<SolverContactFrictionStepBlock*>(currPtr);
|
|
// currPtr += (numFrictionConstr * frictionSize);
|
|
|
|
// /*for (PxU32 i = 0; i<numNormalConstr; i++)
|
|
// {
|
|
// contacts[i].biasCoefficient = V4Sel(V4IsGrtr(contacts[i].separation, zero), contacts[i].biasCoefficient, zero);
|
|
// }*/
|
|
|
|
// for (PxU32 i = 0; i<numFrictionConstr; i++)
|
|
// {
|
|
// frictions[i].biasCoefficient = zero;
|
|
// }
|
|
//}
|
|
}
|
|
|
|
static void conclude1DStep4(const PxSolverConstraintDesc* PX_RESTRICT desc, bool isResidualReportingEnabled)
|
|
{
|
|
PxU8* PX_RESTRICT bPtr = desc->constraint;
|
|
if (bPtr == NULL)
|
|
return;
|
|
|
|
const SolverConstraint1DHeaderStep4* PX_RESTRICT header = reinterpret_cast<const SolverConstraint1DHeaderStep4*>(bPtr);
|
|
PxU8* PX_RESTRICT base = reinterpret_cast<PxU8*>(bPtr + sizeof(SolverConstraint1DHeaderStep4));
|
|
PxU32 stride = isResidualReportingEnabled ? sizeof(SolverConstraint1DStep4WithResidual) : sizeof(SolverConstraint1DStep4);
|
|
|
|
const VecI32V keepBiasMask = I4Load(DY_SC_FLAG_KEEP_BIAS);
|
|
const VecI32V isSpringMask = I4Load(DY_SC_FLAG_SPRING);
|
|
const Vec4V zero = V4Zero();
|
|
|
|
const PxU32 count = header->count;
|
|
for (PxU32 i = 0; i<count; ++i/*, base++*/)
|
|
{
|
|
PxPrefetchLine(base + 1);
|
|
SolverConstraint1DStep4& c = reinterpret_cast<SolverConstraint1DStep4&>(*base);
|
|
|
|
const VecI32V flags = I4LoadA(reinterpret_cast<PxI32*>(c.flags));
|
|
|
|
const BoolV keepBias = VecI32V_IsEq(VecI32V_And(flags, keepBiasMask), keepBiasMask);
|
|
c.biasScale = V4Sel(keepBias, c.biasScale, zero);
|
|
c.error = V4Sel(keepBias, c.error, zero);
|
|
|
|
const BoolV isSpring = VecI32V_IsEq(VecI32V_And(flags, isSpringMask), isSpringMask);
|
|
c.biasScale = V4Sel(isSpring, zero, c.biasScale);
|
|
c.error = V4Sel(isSpring, zero, c.error);
|
|
c.velMultiplier = V4Sel(isSpring, zero, c.velMultiplier);
|
|
c.velTarget = V4Sel(isSpring, zero, c.velTarget);
|
|
|
|
base += stride;
|
|
}
|
|
}
|
|
|
|
void solveConcludeContact4(DY_TGS_CONCLUDE_METHOD_PARAMS)
|
|
{
|
|
PX_UNUSED(txInertias);
|
|
//PX_UNUSED(cache);
|
|
|
|
solveContact4_Block(desc + hdr.startIndex, true, -PX_MAX_F32, elapsedTime, cache);
|
|
concludeContact4_Block(desc + hdr.startIndex);
|
|
}
|
|
|
|
void solveConclude1D4(DY_TGS_CONCLUDE_METHOD_PARAMS)
|
|
{
|
|
PX_UNUSED(cache);
|
|
|
|
solve1DStep4(desc + hdr.startIndex, txInertias, elapsedTime, cache);
|
|
conclude1DStep4(desc + hdr.startIndex, cache.contactErrorAccumulator != NULL);
|
|
}
|
|
|
|
}
|
|
}
|
|
|