1181 lines
51 KiB
Plaintext
1181 lines
51 KiB
Plaintext
|
|
// Redistribution and use in source and binary forms, with or without
|
||
|
|
// modification, are permitted provided that the following conditions
|
||
|
|
// are met:
|
||
|
|
// * Redistributions of source code must retain the above copyright
|
||
|
|
// notice, this list of conditions and the following disclaimer.
|
||
|
|
// * Redistributions in binary form must reproduce the above copyright
|
||
|
|
// notice, this list of conditions and the following disclaimer in the
|
||
|
|
// documentation and/or other materials provided with the distribution.
|
||
|
|
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||
|
|
// contributors may be used to endorse or promote products derived
|
||
|
|
// from this software without specific prior written permission.
|
||
|
|
//
|
||
|
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||
|
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||
|
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||
|
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||
|
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||
|
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||
|
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||
|
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||
|
|
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||
|
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||
|
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||
|
|
//
|
||
|
|
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||
|
|
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||
|
|
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||
|
|
|
||
|
|
#ifndef __SOLVER_BLOCK_TGS_CUH__
|
||
|
|
#define __SOLVER_BLOCK_TGS_CUH__
|
||
|
|
|
||
|
|
#include "common/PxPhysXCommonConfig.h"
|
||
|
|
#include <cuda.h>
|
||
|
|
#include <sm_35_intrinsics.h>
|
||
|
|
#include "PxgSolverBody.h"
|
||
|
|
//#include "PxgSolverConstraint1D.h"
|
||
|
|
#include "PxgSolverConstraintBlock1D.h"
|
||
|
|
#include "PxgSolverConstraintDesc.h"
|
||
|
|
#include "PxgConstraint.h"
|
||
|
|
#include "PxgConstraintBlock.h"
|
||
|
|
#include "PxgIslandContext.h"
|
||
|
|
#include "PxgSolverContext.h"
|
||
|
|
#include "cutil_math.h"
|
||
|
|
#include "PxgSolverCoreDesc.h"
|
||
|
|
#include "DyThresholdTable.h"
|
||
|
|
#include "PxgFrictionPatch.h"
|
||
|
|
#include "foundation/PxUtilities.h"
|
||
|
|
#include "PxgConstraintWriteBack.h"
|
||
|
|
#include "PxgSolverFlags.h"
|
||
|
|
#include "stdio.h"
|
||
|
|
#include "assert.h"
|
||
|
|
#include "PxgIntrinsics.h"
|
||
|
|
#include "solverResidual.cuh"
|
||
|
|
|
||
|
|
#include "DyCpuGpu1dConstraint.h"
|
||
|
|
|
||
|
|
#include "solverBlockCommon.cuh"
|
||
|
|
#include "constraintPrepShared.cuh"
|
||
|
|
|
||
|
|
using namespace physx;
|
||
|
|
|
||
|
|
static __forceinline__ __device__ float4 shfl(const PxU32 syncMask, const float4 f, const PxU32 index)
|
||
|
|
{
|
||
|
|
float4 ret;
|
||
|
|
ret.x = __shfl_sync(syncMask, f.x, index);
|
||
|
|
ret.y = __shfl_sync(syncMask, f.y, index);
|
||
|
|
ret.z = __shfl_sync(syncMask, f.z, index);
|
||
|
|
ret.w = __shfl_sync(syncMask, f.w, index);
|
||
|
|
|
||
|
|
return ret;
|
||
|
|
}
|
||
|
|
|
||
|
|
//Loads a set of TxIData inertia tensors quickly using shuffles
|
||
|
|
static __device__ void loadTxInertia(const PxU32 syncMask, const PxgSolverTxIData* datas, const PxU32 bodyIndex, const PxU32 nbToLoad, const PxU32 threadIndexInWarp, PxgSolverTxIData& out)
|
||
|
|
{
|
||
|
|
if (1)
|
||
|
|
{
|
||
|
|
{
|
||
|
|
float4 data0, data1, data2, data3;
|
||
|
|
|
||
|
|
//There are 4x float4 in a PxgSolverTxIData
|
||
|
|
const PxU32 elementIndex = threadIndexInWarp / 4;
|
||
|
|
|
||
|
|
const PxU32 index0 = __shfl_sync(syncMask, bodyIndex, elementIndex);
|
||
|
|
const PxU32 index1 = __shfl_sync(syncMask, bodyIndex, elementIndex + 8);
|
||
|
|
const PxU32 index2 = __shfl_sync(syncMask, bodyIndex, elementIndex + 16);
|
||
|
|
const PxU32 index3 = __shfl_sync(syncMask, bodyIndex, elementIndex + 24);
|
||
|
|
if (elementIndex < nbToLoad)
|
||
|
|
{
|
||
|
|
data0 = reinterpret_cast<const float4*>(&datas[index0])[threadIndexInWarp & 3];
|
||
|
|
if ((elementIndex + 8) < nbToLoad)
|
||
|
|
{
|
||
|
|
data1 = reinterpret_cast<const float4*>(&datas[index1])[threadIndexInWarp & 3];
|
||
|
|
if ((elementIndex + 16) < nbToLoad)
|
||
|
|
{
|
||
|
|
data2 = reinterpret_cast<const float4*>(&datas[index2])[threadIndexInWarp & 3];
|
||
|
|
if ((elementIndex + 24) < nbToLoad)
|
||
|
|
{
|
||
|
|
data3 = reinterpret_cast<const float4*>(&datas[index3])[threadIndexInWarp & 3];
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
//OK. We have our 4 vectors...now we have to shuffle them
|
||
|
|
//Thread 0 has the first float4 for T0, T8, T16, T24
|
||
|
|
//Thread 1 has the 2nd float4 for T0, T8, T16, T24 ...
|
||
|
|
//Thread 4 has the first float4 T1, T9, T17, T25...
|
||
|
|
|
||
|
|
const PxU32 idxAnd3 = threadIndexInWarp & 3;
|
||
|
|
|
||
|
|
float4 swapVal0 = idxAnd3 == 0 ? data0 : idxAnd3 == 1 ? data1 : idxAnd3 == 2 ? data2 : data3;
|
||
|
|
float4 swapVal1 = idxAnd3 == 0 ? data3 : idxAnd3 == 1 ? data0 : idxAnd3 == 2 ? data1 : data2;
|
||
|
|
float4 swapVal2 = idxAnd3 == 0 ? data2 : idxAnd3 == 1 ? data3 : idxAnd3 == 2 ? data0 : data1;
|
||
|
|
float4 swapVal3 = idxAnd3 == 0 ? data1 : idxAnd3 == 1 ? data2 : idxAnd3 == 2 ? data3 : data0;
|
||
|
|
|
||
|
|
const PxU32 threadReadBase = ((threadIndexInWarp * 4) & 31);
|
||
|
|
|
||
|
|
const PxU32 offset = threadIndexInWarp / 8;
|
||
|
|
|
||
|
|
float4 val0 = shfl(syncMask, swapVal0, threadReadBase + offset);
|
||
|
|
float4 val1 = shfl(syncMask, swapVal1, threadReadBase + ((offset + 1) & 3));
|
||
|
|
float4 val2 = shfl(syncMask, swapVal2, threadReadBase + ((offset + 2) & 3));
|
||
|
|
float4 val3 = shfl(syncMask, swapVal3, threadReadBase + ((offset + 3) & 3));
|
||
|
|
|
||
|
|
float4 shuffled0 = offset == 0 ? val0 : offset == 1 ? val3 : offset == 2 ? val2 : val1;
|
||
|
|
float4 shuffled1 = offset == 0 ? val1 : offset == 1 ? val0 : offset == 2 ? val3 : val2;
|
||
|
|
float4 shuffled2 = offset == 0 ? val2 : offset == 1 ? val1 : offset == 2 ? val0 : val3;
|
||
|
|
float4 shuffled3 = offset == 0 ? val3 : offset == 1 ? val2 : offset == 2 ? val1 : val0;
|
||
|
|
|
||
|
|
//Now copy into the structure...
|
||
|
|
out.deltaBody2World.q.x = shuffled0.x; out.deltaBody2World.q.y = shuffled0.y; out.deltaBody2World.q.z = shuffled0.z; out.deltaBody2World.q.w = shuffled0.w;
|
||
|
|
out.deltaBody2World.p.x = shuffled1.x; out.deltaBody2World.p.y = shuffled1.y; out.deltaBody2World.p.z = shuffled1.z;
|
||
|
|
out.sqrtInvInertia.column0.x = shuffled1.w; out.sqrtInvInertia.column0.y = shuffled2.x; out.sqrtInvInertia.column0.z = shuffled2.y;
|
||
|
|
out.sqrtInvInertia.column1.x = shuffled2.z; out.sqrtInvInertia.column1.y = shuffled2.w; out.sqrtInvInertia.column1.z = shuffled3.x;
|
||
|
|
out.sqrtInvInertia.column2.x = shuffled3.y; out.sqrtInvInertia.column2.y = shuffled3.z; out.sqrtInvInertia.column2.z = shuffled3.w;
|
||
|
|
|
||
|
|
}
|
||
|
|
}
|
||
|
|
else
|
||
|
|
{
|
||
|
|
if (threadIndexInWarp < nbToLoad)
|
||
|
|
out = datas[bodyIndex];
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// Using the same logic as the previous implementation, but mass-splitting is additionally performed per sub-timestep.
|
||
|
|
// Required data is packed and stored differently from the previous implementation to split mass at each sub-timestep;
|
||
|
|
// i.e., mass-related terms are computed at every sub-timestep.
|
||
|
|
// Refer to "Mass Splitting for Jitter-Free Parallel Rigid Body Simulation" for the general mass-splitting idea.
|
||
|
|
|
||
|
|
static __device__ void solveContactBlockTGS(const PxgBlockConstraintBatch& batch, PxVec3& b0LinVel, PxVec3& b0AngVel, PxVec3& b1LinVel, PxVec3& b1AngVel,
|
||
|
|
const PxVec3& b0LinDelta, const PxVec3& b0AngDelta, const PxVec3& b1LinDelta, const PxVec3& b1AngDelta,
|
||
|
|
const PxU32 threadIndex, const PxgTGSBlockSolverContactHeader* PX_RESTRICT contactHeaders,
|
||
|
|
PxgTGSBlockSolverFrictionHeader* PX_RESTRICT frictionHeaders, PxgTGSBlockSolverContactPoint* PX_RESTRICT contactPoints,
|
||
|
|
PxgTGSBlockSolverContactFriction* PX_RESTRICT frictionPoints,
|
||
|
|
const PxReal elapsedTime, const PxReal minPen, PxgErrorAccumulator* error,
|
||
|
|
PxReal ref0 = 1.f, PxReal ref1 = 1.f)
|
||
|
|
{
|
||
|
|
using namespace physx;
|
||
|
|
|
||
|
|
PxVec3 linVel0(b0LinVel.x, b0LinVel.y, b0LinVel.z);
|
||
|
|
PxVec3 linVel1(b1LinVel.x, b1LinVel.y, b1LinVel.z);
|
||
|
|
PxVec3 angVel0(b0AngVel.x, b0AngVel.y, b0AngVel.z);
|
||
|
|
PxVec3 angVel1(b1AngVel.x, b1AngVel.y, b1AngVel.z);
|
||
|
|
|
||
|
|
float accumulatedNormalImpulse = 0.f;
|
||
|
|
|
||
|
|
{
|
||
|
|
const PxgTGSBlockSolverContactHeader* PX_RESTRICT contactHeader = &contactHeaders[batch.mConstraintBatchIndex];
|
||
|
|
PxgTGSBlockSolverFrictionHeader* PX_RESTRICT frictionHeader = &frictionHeaders[batch.mConstraintBatchIndex];
|
||
|
|
PxgTGSBlockSolverContactFriction* PX_RESTRICT frictions = &frictionPoints[batch.startFrictionIndex];
|
||
|
|
|
||
|
|
const uint numNormalConstr = Pxldcs(contactHeader->numNormalConstr[threadIndex]);
|
||
|
|
const uint numFrictionConstrTotal = Pxldcs(frictionHeader->numFrictionConstr[threadIndex]);
|
||
|
|
const uint numFrictionConstr = numFrictionConstrTotal & (~1);
|
||
|
|
|
||
|
|
float accumDeltaF = 0.f;
|
||
|
|
|
||
|
|
if (numNormalConstr)
|
||
|
|
{
|
||
|
|
const PxReal maxPenBias = Pxldcs(contactHeader->maxPenBias[threadIndex]);
|
||
|
|
|
||
|
|
PxgTGSBlockSolverContactPoint* PX_RESTRICT contacts = &contactPoints[batch.startConstraintIndex];
|
||
|
|
const float4 invMass0_1_angDom0_1 = Pxldcs(contactHeader->invMass0_1_angDom0_1[threadIndex]);
|
||
|
|
|
||
|
|
const float invMassA = ref0 * invMass0_1_angDom0_1.x;
|
||
|
|
const float invMassB = ref1 * invMass0_1_angDom0_1.y;
|
||
|
|
|
||
|
|
const float angDom0 = ref0 * invMass0_1_angDom0_1.z;
|
||
|
|
const float angDom1 = ref1 * invMass0_1_angDom0_1.w;
|
||
|
|
|
||
|
|
const float sumInvMass = invMassA + invMassB;
|
||
|
|
|
||
|
|
const float4 normal_staticFriction = Pxldcg(contactHeader->normal_staticFriction[threadIndex]);
|
||
|
|
|
||
|
|
const PxVec3 normal = PxVec3(normal_staticFriction.x, normal_staticFriction.y, normal_staticFriction.z);
|
||
|
|
|
||
|
|
const float restitutionXdt = contactHeader->restitutionXdt[threadIndex];
|
||
|
|
const float p8 = contactHeader->p8[threadIndex]; // using previous p8 value in the prep step.
|
||
|
|
const PxU8 flags = contactHeader->flags[threadIndex];
|
||
|
|
|
||
|
|
const PxVec3 delLinVel0 = normal * invMassA;
|
||
|
|
const PxVec3 delLinVel1 = normal * invMassB;
|
||
|
|
|
||
|
|
//Bring forward a read event
|
||
|
|
const float staticFrictionCof = normal_staticFriction.w;
|
||
|
|
|
||
|
|
const PxVec3 relMotion = b0LinDelta - b1LinDelta;
|
||
|
|
|
||
|
|
const float deltaV = normal.dot(relMotion);
|
||
|
|
|
||
|
|
float relVel1 = (linVel0 - linVel1).dot(normal);
|
||
|
|
|
||
|
|
float4 next_raXn_extraCoeff = Pxldcs(contacts->raXn_extraCoeff[threadIndex]);
|
||
|
|
float4 next_rbXn_targetVelW = Pxldcs(contacts->rbXn_targetVelW[threadIndex]);
|
||
|
|
float next_appliedForce = Pxldcs(contacts->appliedForce[threadIndex]);
|
||
|
|
float next_error = Pxldcs(contacts->separation[threadIndex]);
|
||
|
|
float next_maxImpulse = Pxldcs(contacts->maxImpulse[threadIndex]);
|
||
|
|
float next_biasCoefficient = Pxldcs(contacts->biasCoefficient[threadIndex]);
|
||
|
|
|
||
|
|
float next_resp0 = ref0 * Pxldcs(contacts->resp0[threadIndex]);
|
||
|
|
float next_resp1 = ref1 * Pxldcs(contacts->resp1[threadIndex]);
|
||
|
|
|
||
|
|
float next_unitResponse = next_resp0 + next_resp1;
|
||
|
|
float next_recipResponse = (next_unitResponse > 0.f) ? (1.f / next_unitResponse) : 0.f;
|
||
|
|
float next_velMultiplier = next_recipResponse;
|
||
|
|
|
||
|
|
if (restitutionXdt < 0.f)
|
||
|
|
{
|
||
|
|
computeCompliantContactCoefficientsTGS(flags, restitutionXdt, next_unitResponse,
|
||
|
|
next_recipResponse, next_raXn_extraCoeff.w, next_velMultiplier,
|
||
|
|
next_biasCoefficient);
|
||
|
|
}
|
||
|
|
|
||
|
|
{
|
||
|
|
for (uint i = 0; i < numNormalConstr; i++)
|
||
|
|
{
|
||
|
|
PxgTGSBlockSolverContactPoint& c = contacts[i];
|
||
|
|
|
||
|
|
const float4 raXn_contactCoeff = next_raXn_extraCoeff;
|
||
|
|
const float velMultiplier = next_velMultiplier;
|
||
|
|
const float4 rbXn_targetVelW = next_rbXn_targetVelW;
|
||
|
|
const float appliedForce = next_appliedForce;
|
||
|
|
const float separation = next_error;
|
||
|
|
const float maxImpulse = next_maxImpulse;
|
||
|
|
const float biasCoefficient = next_biasCoefficient;
|
||
|
|
const float recipResponse = next_recipResponse;
|
||
|
|
|
||
|
|
if ((i + 1) < numNormalConstr)
|
||
|
|
{
|
||
|
|
const PxgTGSBlockSolverContactPoint& nextC = contacts[i + 1];
|
||
|
|
|
||
|
|
next_raXn_extraCoeff = Pxldcs(nextC.raXn_extraCoeff[threadIndex]);
|
||
|
|
next_rbXn_targetVelW = Pxldcs(nextC.rbXn_targetVelW[threadIndex]);
|
||
|
|
next_appliedForce = Pxldcs(nextC.appliedForce[threadIndex]);
|
||
|
|
next_error = Pxldcs(nextC.separation[threadIndex]);
|
||
|
|
next_maxImpulse = Pxldcs(nextC.maxImpulse[threadIndex]);
|
||
|
|
next_biasCoefficient = Pxldcs(nextC.biasCoefficient[threadIndex]);
|
||
|
|
|
||
|
|
next_resp0 = ref0 * Pxldcs(nextC.resp0[threadIndex]);
|
||
|
|
next_resp1 = ref1 * Pxldcs(nextC.resp1[threadIndex]);
|
||
|
|
|
||
|
|
next_unitResponse = next_resp0 + next_resp1;
|
||
|
|
next_recipResponse = (next_unitResponse > 0.f) ? (1.f / next_unitResponse) : 0.f;
|
||
|
|
|
||
|
|
next_velMultiplier = next_recipResponse;
|
||
|
|
|
||
|
|
if (restitutionXdt < 0.f)
|
||
|
|
{
|
||
|
|
computeCompliantContactCoefficientsTGS(flags, restitutionXdt, next_unitResponse,
|
||
|
|
next_recipResponse, next_raXn_extraCoeff.w, next_velMultiplier,
|
||
|
|
next_biasCoefficient);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
else if (numFrictionConstr)
|
||
|
|
{
|
||
|
|
next_raXn_extraCoeff = Pxldcs(frictions[0].raXn_error[threadIndex]);
|
||
|
|
next_rbXn_targetVelW = Pxldcs(frictions[0].rbXn_targetVelW[threadIndex]);
|
||
|
|
next_error = next_raXn_extraCoeff.w;
|
||
|
|
next_appliedForce = Pxldcs(frictions[0].appliedForce[threadIndex]);
|
||
|
|
|
||
|
|
next_resp0 = ref0 * Pxldcs(frictions[0].resp0[threadIndex]);
|
||
|
|
next_resp1 = ref1 * Pxldcs(frictions[0].resp1[threadIndex]);
|
||
|
|
|
||
|
|
const float next_resp = next_resp0 + next_resp1;
|
||
|
|
next_velMultiplier = (next_resp > 0.f) ? (p8 / next_resp) : 0.f;
|
||
|
|
}
|
||
|
|
|
||
|
|
const PxVec3 raXn = PxVec3(raXn_contactCoeff.x, raXn_contactCoeff.y, raXn_contactCoeff.z);
|
||
|
|
const PxVec3 rbXn = PxVec3(rbXn_targetVelW.x, rbXn_targetVelW.y, rbXn_targetVelW.z);
|
||
|
|
const float targetVel = rbXn_targetVelW.w;
|
||
|
|
|
||
|
|
//Compute the normal velocity of the constraint.
|
||
|
|
const PxReal v0 = angVel0.dot(raXn);
|
||
|
|
const PxReal v1 = angVel1.dot(rbXn);
|
||
|
|
const float normalVel = relVel1 + (v0 - v1);
|
||
|
|
|
||
|
|
const float sep = PxMax(minPen, separation + deltaV + b0AngDelta.dot(raXn) - b1AngDelta.dot(rbXn));
|
||
|
|
|
||
|
|
//How much the target vel should have changed the position of this constraint...
|
||
|
|
const PxReal tVelErr = targetVel * elapsedTime;
|
||
|
|
|
||
|
|
const PxReal biasedErr = recipResponse * fminf(-maxPenBias, biasCoefficient * (sep - tVelErr));
|
||
|
|
|
||
|
|
//KS - clamp the maximum force
|
||
|
|
const float tempDeltaF = biasedErr - (normalVel - targetVel) * velMultiplier;
|
||
|
|
const float _deltaF = fmaxf(tempDeltaF, -appliedForce);//FMax(FNegScaleSub(normalVel, velMultiplier, biasedErr), FNeg(appliedForce));
|
||
|
|
const float _newForce = appliedForce + _deltaF;
|
||
|
|
const float newForce = fminf(_newForce, maxImpulse);//FMin(_newForce, maxImpulse);
|
||
|
|
const float deltaF = newForce - appliedForce;
|
||
|
|
|
||
|
|
accumDeltaF += deltaF;
|
||
|
|
|
||
|
|
relVel1 += sumInvMass * deltaF;
|
||
|
|
|
||
|
|
angVel0 += raXn * (deltaF * angDom0);
|
||
|
|
angVel1 -= rbXn * (deltaF * angDom1);
|
||
|
|
|
||
|
|
if(error)
|
||
|
|
error->accumulateErrorLocal(deltaF, velMultiplier);
|
||
|
|
|
||
|
|
Pxstcs(&c.appliedForce[threadIndex], newForce);
|
||
|
|
|
||
|
|
accumulatedNormalImpulse = accumulatedNormalImpulse + newForce;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
linVel0 += delLinVel0 * accumDeltaF;
|
||
|
|
linVel1 -= delLinVel1 * accumDeltaF;
|
||
|
|
|
||
|
|
if (numFrictionConstr)
|
||
|
|
{
|
||
|
|
const float biasCoefficient = Pxldcg(frictionHeader->biasCoefficient[threadIndex]);
|
||
|
|
|
||
|
|
const float dynamicFrictionCof = Pxldcg(frictionHeader->dynamicFriction[threadIndex]);
|
||
|
|
const float maxFrictionImpulse = staticFrictionCof * accumulatedNormalImpulse;
|
||
|
|
const float maxDynFrictionImpulse = dynamicFrictionCof * accumulatedNormalImpulse;
|
||
|
|
|
||
|
|
PxU32 broken = 0;
|
||
|
|
|
||
|
|
const float4 frictionNormal0 = Pxldcg(frictionHeader->frictionNormals[0][threadIndex]);
|
||
|
|
const float4 frictionNormal1 = Pxldcg(frictionHeader->frictionNormals[1][threadIndex]);
|
||
|
|
const PxVec3 normal0 = PxVec3(frictionNormal0.x, frictionNormal0.y, frictionNormal0.z);
|
||
|
|
const PxVec3 normal1 = PxVec3(frictionNormal1.x, frictionNormal1.y, frictionNormal1.z);
|
||
|
|
|
||
|
|
const PxVec3 delLinVel00 = normal0 * invMassA;
|
||
|
|
const PxVec3 delLinVel10 = normal0 * invMassB;
|
||
|
|
const PxVec3 delLinVel01 = normal1 * invMassA;
|
||
|
|
const PxVec3 delLinVel11 = normal1 * invMassB;
|
||
|
|
|
||
|
|
float relDelta0 = relMotion.dot(normal0);
|
||
|
|
float relDelta1 = relMotion.dot(normal1);
|
||
|
|
|
||
|
|
for (uint i = 0; i < numFrictionConstr; i += 2)
|
||
|
|
{
|
||
|
|
PxgTGSBlockSolverContactFriction& f0 = frictions[i];
|
||
|
|
PxgTGSBlockSolverContactFriction& f1 = frictions[i + 1];
|
||
|
|
|
||
|
|
const float4 raXn_extraCoeff1 = f1.raXn_error[threadIndex];
|
||
|
|
const float4 rbXn_targetVelW1 = f1.rbXn_targetVelW[threadIndex];
|
||
|
|
const float initialError1 = raXn_extraCoeff1.w;
|
||
|
|
const float appliedForce1 = f1.appliedForce[threadIndex];
|
||
|
|
const float targetVel1 = rbXn_targetVelW1.w;
|
||
|
|
|
||
|
|
const float4 raXn_extraCoeff0 = next_raXn_extraCoeff;
|
||
|
|
const float4 rbXn_targetVelW0 = next_rbXn_targetVelW;
|
||
|
|
const float initialError0 = next_error;
|
||
|
|
const float appliedForce0 = next_appliedForce;
|
||
|
|
const float targetVel0 = rbXn_targetVelW0.w;
|
||
|
|
|
||
|
|
const float f1_resp0 = ref0 * f1.resp0[threadIndex];
|
||
|
|
const float f1_resp1 = ref1 * f1.resp1[threadIndex];
|
||
|
|
const float f1_resp = f1_resp0 + f1_resp1;
|
||
|
|
const float velMultiplier1 = (f1_resp > 0.f) ? (p8 / f1_resp) : 0.f;
|
||
|
|
const float velMultiplier0 = next_velMultiplier;
|
||
|
|
|
||
|
|
if ((i + 2) < numFrictionConstrTotal)
|
||
|
|
{
|
||
|
|
next_raXn_extraCoeff = Pxldcs(frictions[i + 2].raXn_error[threadIndex]);
|
||
|
|
next_rbXn_targetVelW = Pxldcs(frictions[i + 2].rbXn_targetVelW[threadIndex]);
|
||
|
|
next_error = next_raXn_extraCoeff.w;
|
||
|
|
next_appliedForce = Pxldcs(frictions[i + 2].appliedForce[threadIndex]);
|
||
|
|
|
||
|
|
next_resp0 = ref0 * Pxldcs(frictions[i + 2].resp0[threadIndex]);
|
||
|
|
next_resp1 = ref1 * Pxldcs(frictions[i + 2].resp1[threadIndex]);
|
||
|
|
|
||
|
|
const float next_resp = next_resp0 + next_resp1;
|
||
|
|
next_velMultiplier = (next_resp > 0.f) ? (p8 / next_resp) : 0.f;
|
||
|
|
}
|
||
|
|
|
||
|
|
const PxVec3 raXn0 = PxVec3(raXn_extraCoeff0.x, raXn_extraCoeff0.y, raXn_extraCoeff0.z);
|
||
|
|
const PxVec3 rbXn0 = PxVec3(rbXn_targetVelW0.x, rbXn_targetVelW0.y, rbXn_targetVelW0.z);
|
||
|
|
const PxReal v00 = angVel0.dot(raXn0) + linVel0.dot(normal0);
|
||
|
|
const PxReal v10 = angVel1.dot(rbXn0) + linVel1.dot(normal0);
|
||
|
|
const float normalVel0 = v00 - v10;
|
||
|
|
|
||
|
|
const float error0 = initialError0 - targetVel0 * elapsedTime +
|
||
|
|
raXn0.dot(b0AngDelta) - rbXn0.dot(b1AngDelta) + relDelta0;
|
||
|
|
|
||
|
|
const float bias0 = error0 * biasCoefficient;
|
||
|
|
const float tmp10 = appliedForce0 - (bias0 - targetVel0) * velMultiplier0;
|
||
|
|
const float totalImpulse0 = tmp10 - normalVel0 * velMultiplier0;
|
||
|
|
|
||
|
|
const PxVec3 raXn1 = PxVec3(raXn_extraCoeff1.x, raXn_extraCoeff1.y, raXn_extraCoeff1.z);
|
||
|
|
const PxVec3 rbXn1 = PxVec3(rbXn_targetVelW1.x, rbXn_targetVelW1.y, rbXn_targetVelW1.z);
|
||
|
|
|
||
|
|
const PxReal v01 = angVel0.dot(raXn1) + linVel0.dot(normal1);
|
||
|
|
const PxReal v11 = angVel1.dot(rbXn1) + linVel1.dot(normal1);
|
||
|
|
const float normalVel1 = v01 - v11;
|
||
|
|
|
||
|
|
const float error1 = initialError1 - targetVel1 * elapsedTime +
|
||
|
|
raXn1.dot(b0AngDelta) - rbXn1.dot(b1AngDelta) + relDelta1;
|
||
|
|
|
||
|
|
const float bias1 = error1 * biasCoefficient;
|
||
|
|
const float tmp11 = appliedForce1 - (bias1 - targetVel1) * velMultiplier1;
|
||
|
|
const float totalImpulse1 = tmp11 - normalVel1 * velMultiplier1;
|
||
|
|
|
||
|
|
const float totalImpulse = PxSqrt(totalImpulse0 * totalImpulse0 + totalImpulse1 * totalImpulse1);
|
||
|
|
|
||
|
|
const bool clamp = totalImpulse > maxFrictionImpulse;
|
||
|
|
|
||
|
|
const float ratio = clamp ? fminf(maxDynFrictionImpulse, totalImpulse) / totalImpulse : 1.f;
|
||
|
|
|
||
|
|
const float newAppliedForce0 = totalImpulse0 * ratio;
|
||
|
|
const float newAppliedForce1 = totalImpulse1 * ratio;
|
||
|
|
|
||
|
|
float deltaF0 = newAppliedForce0 - appliedForce0;
|
||
|
|
float deltaF1 = newAppliedForce1 - appliedForce1;
|
||
|
|
|
||
|
|
if (error)
|
||
|
|
error->accumulateErrorLocal(deltaF0, deltaF1, velMultiplier0, velMultiplier1);
|
||
|
|
|
||
|
|
linVel0 += delLinVel00 * deltaF0;
|
||
|
|
linVel1 -= delLinVel10 * deltaF0;
|
||
|
|
angVel0 += raXn0 * (deltaF0 * angDom0);
|
||
|
|
angVel1 -= rbXn0 * (deltaF0 * angDom1);
|
||
|
|
|
||
|
|
linVel0 += delLinVel01 * deltaF1;
|
||
|
|
linVel1 -= delLinVel11 * deltaF1;
|
||
|
|
angVel0 += raXn1 * (deltaF1 * angDom0);
|
||
|
|
angVel1 -= rbXn1 * (deltaF1 * angDom1);
|
||
|
|
|
||
|
|
Pxstcs(&f0.appliedForce[threadIndex], newAppliedForce0);
|
||
|
|
Pxstcs(&f1.appliedForce[threadIndex], newAppliedForce1);
|
||
|
|
broken = broken | clamp;
|
||
|
|
}
|
||
|
|
|
||
|
|
if (numFrictionConstr < numFrictionConstrTotal)
|
||
|
|
{
|
||
|
|
const PxReal frictionScale = frictionHeader->torsionalFrictionScale[threadIndex];
|
||
|
|
|
||
|
|
//We have a torsional friction anchor, solve this...
|
||
|
|
PxgTGSBlockSolverContactFriction& f0 = frictions[numFrictionConstr];
|
||
|
|
const float4 raXn_extraCoeff0 = next_raXn_extraCoeff;
|
||
|
|
const PxVec3 raXn0 = PxVec3(raXn_extraCoeff0.x, raXn_extraCoeff0.y, raXn_extraCoeff0.z);
|
||
|
|
const float velMultiplier0 = next_velMultiplier;
|
||
|
|
const float4 rbXn_targetVelW0 = next_rbXn_targetVelW;
|
||
|
|
const float appliedForce0 = next_appliedForce;
|
||
|
|
const float targetVel0 = rbXn_targetVelW0.w;
|
||
|
|
|
||
|
|
const PxVec3 rbXn0 = PxVec3(rbXn_targetVelW0.x, rbXn_targetVelW0.y, rbXn_targetVelW0.z);
|
||
|
|
|
||
|
|
const PxReal v00 = angVel0.dot(raXn0);
|
||
|
|
const PxReal v10 = angVel1.dot(rbXn0);
|
||
|
|
const float normalVel0 = v00 - v10;
|
||
|
|
|
||
|
|
const float tmp10 = appliedForce0 - (-targetVel0) * velMultiplier0;
|
||
|
|
const float totalImpulse = tmp10 - normalVel0 * velMultiplier0;
|
||
|
|
|
||
|
|
const bool clamp = PxAbs(totalImpulse) > (maxFrictionImpulse * frictionScale);
|
||
|
|
|
||
|
|
const PxReal totalClamped = PxClamp(totalImpulse, -maxDynFrictionImpulse * frictionScale, maxDynFrictionImpulse * frictionScale);
|
||
|
|
|
||
|
|
const PxReal newAppliedForce = clamp ? totalClamped : totalImpulse;
|
||
|
|
|
||
|
|
float deltaF = newAppliedForce - appliedForce0;
|
||
|
|
if (error)
|
||
|
|
error->accumulateErrorLocal(deltaF, velMultiplier0);
|
||
|
|
|
||
|
|
angVel0 += raXn0 * (deltaF * angDom0);
|
||
|
|
angVel1 -= rbXn0 * (deltaF * angDom1);
|
||
|
|
|
||
|
|
Pxstcs(&f0.appliedForce[threadIndex], newAppliedForce);
|
||
|
|
broken = broken | clamp;
|
||
|
|
}
|
||
|
|
|
||
|
|
Pxstcs(&frictionHeader->broken[threadIndex], broken);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// Write back
|
||
|
|
b0LinVel = PxVec3(linVel0.x, linVel0.y, linVel0.z);
|
||
|
|
b0AngVel = PxVec3(angVel0.x, angVel0.y, angVel0.z);
|
||
|
|
b1LinVel = PxVec3(linVel1.x, linVel1.y, linVel1.z);
|
||
|
|
b1AngVel = PxVec3(angVel1.x, angVel1.y, angVel1.z);
|
||
|
|
}
|
||
|
|
|
||
|
|
// A light version of the function "solveContactBlockTGS" to quickly check if there is any active contact.
|
||
|
|
// TODO: Make this even lighter.
|
||
|
|
|
||
|
|
static __device__ bool checkActiveContactBlockTGS(const PxgBlockConstraintBatch& batch, const PxVec3& linVel0, const PxVec3& angVel0,
|
||
|
|
const PxVec3& linVel1, const PxVec3& angVel1, const PxVec3& b0LinDelta, const PxVec3& b0AngDelta, const PxVec3& b1LinDelta,
|
||
|
|
const PxVec3& b1AngDelta, const PxU32 threadIndex, const PxgTGSBlockSolverContactHeader* PX_RESTRICT contactHeaders,
|
||
|
|
PxgTGSBlockSolverContactPoint* PX_RESTRICT contactPoints, const PxReal elapsedTime, const PxReal minPen)
|
||
|
|
{
|
||
|
|
using namespace physx;
|
||
|
|
|
||
|
|
{
|
||
|
|
const PxgTGSBlockSolverContactHeader* PX_RESTRICT contactHeader = &contactHeaders[batch.mConstraintBatchIndex];
|
||
|
|
const uint numNormalConstr = Pxldcs(contactHeader->numNormalConstr[threadIndex]);
|
||
|
|
|
||
|
|
if (numNormalConstr)
|
||
|
|
{
|
||
|
|
const PxReal maxPenBias = Pxldcs(contactHeader->maxPenBias[threadIndex]);
|
||
|
|
|
||
|
|
PxgTGSBlockSolverContactPoint* PX_RESTRICT contacts = &contactPoints[batch.startConstraintIndex];
|
||
|
|
const float4 invMass0_1_angDom0_1 = Pxldcs(contactHeader->invMass0_1_angDom0_1[threadIndex]);
|
||
|
|
|
||
|
|
const float invMassA = invMass0_1_angDom0_1.x;
|
||
|
|
const float invMassB = invMass0_1_angDom0_1.y;
|
||
|
|
|
||
|
|
const float4 normal_staticFriction = Pxldcg(contactHeader->normal_staticFriction[threadIndex]);
|
||
|
|
const PxVec3 normal = PxVec3(normal_staticFriction.x, normal_staticFriction.y, normal_staticFriction.z);
|
||
|
|
|
||
|
|
const float restitutionXdt = contactHeader->restitutionXdt[threadIndex];
|
||
|
|
const PxU8 flags = contactHeader->flags[threadIndex];
|
||
|
|
const PxVec3 delLinVel0 = normal * invMassA;
|
||
|
|
const PxVec3 delLinVel1 = normal * invMassB;
|
||
|
|
|
||
|
|
//Bring forward a read event
|
||
|
|
const PxVec3 relMotion = b0LinDelta - b1LinDelta;
|
||
|
|
const float deltaV = normal.dot(relMotion);
|
||
|
|
float relVel1 = (linVel0 - linVel1).dot(normal);
|
||
|
|
|
||
|
|
float4 next_raXn_extraCoeff = Pxldcs(contacts->raXn_extraCoeff[threadIndex]);
|
||
|
|
float4 next_rbXn_targetVelW = Pxldcs(contacts->rbXn_targetVelW[threadIndex]);
|
||
|
|
float next_appliedForce = Pxldcs(contacts->appliedForce[threadIndex]);
|
||
|
|
float next_error = Pxldcs(contacts->separation[threadIndex]);
|
||
|
|
float next_maxImpulse = Pxldcs(contacts->maxImpulse[threadIndex]);
|
||
|
|
float next_biasCoefficient = Pxldcs(contacts->biasCoefficient[threadIndex]);
|
||
|
|
|
||
|
|
float next_resp0 = Pxldcs(contacts->resp0[threadIndex]);
|
||
|
|
float next_resp1 = Pxldcs(contacts->resp1[threadIndex]);
|
||
|
|
|
||
|
|
float next_unitResponse = next_resp0 + next_resp1;
|
||
|
|
float next_recipResponse = (next_unitResponse > 0.f) ? (1.f / next_unitResponse) : 0.f;
|
||
|
|
float next_velMultiplier = next_recipResponse;
|
||
|
|
|
||
|
|
if (restitutionXdt < 0.f)
|
||
|
|
{
|
||
|
|
computeCompliantContactCoefficientsTGS(flags, restitutionXdt, next_unitResponse,
|
||
|
|
next_recipResponse, next_raXn_extraCoeff.w, next_velMultiplier,
|
||
|
|
next_biasCoefficient);
|
||
|
|
}
|
||
|
|
|
||
|
|
{
|
||
|
|
for (uint i = 0; i < numNormalConstr; i++)
|
||
|
|
{
|
||
|
|
const float4 raXn_contactCoeff = next_raXn_extraCoeff;
|
||
|
|
const float velMultiplier = next_velMultiplier;
|
||
|
|
const float4 rbXn_targetVelW = next_rbXn_targetVelW;
|
||
|
|
const float appliedForce = next_appliedForce;
|
||
|
|
const float separation = next_error;
|
||
|
|
const float maxImpulse = next_maxImpulse;
|
||
|
|
const float biasCoefficient = next_biasCoefficient;
|
||
|
|
const float recipResponse = next_recipResponse;
|
||
|
|
|
||
|
|
if ((i + 1) < numNormalConstr)
|
||
|
|
{
|
||
|
|
const PxgTGSBlockSolverContactPoint& nextC = contacts[i + 1];
|
||
|
|
next_raXn_extraCoeff = Pxldcs(nextC.raXn_extraCoeff[threadIndex]);
|
||
|
|
next_rbXn_targetVelW = Pxldcs(nextC.rbXn_targetVelW[threadIndex]);
|
||
|
|
next_appliedForce = Pxldcs(nextC.appliedForce[threadIndex]);
|
||
|
|
next_error = Pxldcs(nextC.separation[threadIndex]);
|
||
|
|
next_maxImpulse = Pxldcs(nextC.maxImpulse[threadIndex]);
|
||
|
|
next_biasCoefficient = Pxldcs(nextC.biasCoefficient[threadIndex]);
|
||
|
|
|
||
|
|
next_resp0 = Pxldcs(nextC.resp0[threadIndex]);
|
||
|
|
next_resp1 = Pxldcs(nextC.resp1[threadIndex]);
|
||
|
|
|
||
|
|
next_unitResponse = next_resp0 + next_resp1;
|
||
|
|
next_recipResponse = (next_unitResponse > 0.f) ? (1.f / next_unitResponse) : 0.f;
|
||
|
|
|
||
|
|
next_velMultiplier = next_recipResponse;
|
||
|
|
|
||
|
|
if (restitutionXdt < 0.f)
|
||
|
|
{
|
||
|
|
computeCompliantContactCoefficientsTGS(flags, restitutionXdt, next_unitResponse,
|
||
|
|
next_recipResponse, next_raXn_extraCoeff.w, next_velMultiplier,
|
||
|
|
next_biasCoefficient);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
const PxVec3 raXn = PxVec3(raXn_contactCoeff.x, raXn_contactCoeff.y, raXn_contactCoeff.z);
|
||
|
|
const PxVec3 rbXn = PxVec3(rbXn_targetVelW.x, rbXn_targetVelW.y, rbXn_targetVelW.z);
|
||
|
|
const float targetVel = rbXn_targetVelW.w;
|
||
|
|
|
||
|
|
//Compute the normal velocity of the constraint.
|
||
|
|
const PxReal v0 = angVel0.dot(raXn);
|
||
|
|
const PxReal v1 = angVel1.dot(rbXn);
|
||
|
|
const float normalVel = relVel1 + (v0 - v1);
|
||
|
|
|
||
|
|
const float sep = PxMax(minPen, separation + deltaV + b0AngDelta.dot(raXn) - b1AngDelta.dot(rbXn));
|
||
|
|
|
||
|
|
//How much the target vel should have changed the position of this constraint...
|
||
|
|
const PxReal tVelErr = targetVel * elapsedTime;
|
||
|
|
const PxReal biasedErr = recipResponse * fminf(-maxPenBias, biasCoefficient * (sep - tVelErr));
|
||
|
|
|
||
|
|
//KS - clamp the maximum force
|
||
|
|
const float tempDeltaF = biasedErr - (normalVel - targetVel) * velMultiplier;
|
||
|
|
const float _deltaF = fmaxf(tempDeltaF, -appliedForce);//FMax(FNegScaleSub(normalVel, velMultiplier, biasedErr), FNeg(appliedForce));
|
||
|
|
const float _newForce = appliedForce + _deltaF;
|
||
|
|
const float newForce = fminf(_newForce, maxImpulse);//FMin(_newForce, maxImpulse);
|
||
|
|
const float deltaF = newForce - appliedForce;
|
||
|
|
|
||
|
|
// active contact
|
||
|
|
if (PxAbs(deltaF) > 1.0e-8f)
|
||
|
|
{
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
|
||
|
|
static __device__ void concludeContactBlockTGS(const PxgBlockConstraintBatch& batch, const PxU32 threadIndex, PxgTGSBlockSolverContactHeader* contactHeaders, PxgTGSBlockSolverFrictionHeader* frictionHeaders,
|
||
|
|
PxgTGSBlockSolverContactPoint* contactPoints, PxgTGSBlockSolverContactFriction* frictions)
|
||
|
|
{
|
||
|
|
|
||
|
|
#if 0
|
||
|
|
using namespace physx;
|
||
|
|
|
||
|
|
{
|
||
|
|
const PxgTGSBlockSolverContactHeader* contactHeader = &contactHeaders[batch.mConstraintBatchIndex];
|
||
|
|
PxgTGSBlockSolverFrictionHeader* frictionHeader = &frictionHeaders[batch.mConstraintBatchIndex];
|
||
|
|
|
||
|
|
const uint32_t numNormalConstr = contactHeader->numNormalConstr[threadIndex];
|
||
|
|
//const uint32_t numFrictionConstr = frictionHeader->numFrictionConstr[threadIndex];
|
||
|
|
|
||
|
|
PxgTGSBlockSolverContactPoint* contacts = &contactPoints[batch.startConstraintIndex];
|
||
|
|
for(uint32_t i=0;i<numNormalConstr;i++)
|
||
|
|
{
|
||
|
|
//contactPoints[i].setScaledBias(fmaxf(contactPoints[i].getScaledBias(), 0.f));
|
||
|
|
contacts[i].biasCoefficeint[threadIndex] = 0.f;
|
||
|
|
}
|
||
|
|
|
||
|
|
frictionHeader->biasCoefficient[threadIndex] = 0.f;
|
||
|
|
|
||
|
|
//PxgTGSBlockSolverContactFriction* frictionConstr = &frictions[batch.startFrictionIndex];
|
||
|
|
//for(uint32_t i=0;i<numFrictionConstr;i++)
|
||
|
|
//{
|
||
|
|
// //frictionConstr[i].setBias(0.f);
|
||
|
|
// frictionConstr[i].bias[threadIndex] = 0.f;
|
||
|
|
//}
|
||
|
|
}
|
||
|
|
#endif
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
static __device__ void writeBackContactBlockTGS(const PxgBlockConstraintBatch& batch, const PxU32 threadIndex,
|
||
|
|
const PxgSolverBodyData* bodies, Dy::ThresholdStreamElement* thresholdStream,
|
||
|
|
PxI32* sharedThresholdStreamIndex, PxgTGSBlockSolverContactHeader* contactHeaders, PxgTGSBlockSolverFrictionHeader* frictionHeaders,
|
||
|
|
PxgTGSBlockSolverContactPoint* contactPoints, PxgTGSBlockSolverContactFriction* frictions,
|
||
|
|
PxF32* forcewritebackBuffer, PxgBlockFrictionPatch& frictionPatchBlock,
|
||
|
|
PxgFrictionPatchGPU* frictionPatches)
|
||
|
|
{
|
||
|
|
const PxU32 bodyAIndex = batch.bodyAIndex[threadIndex];
|
||
|
|
const PxU32 bodyBIndex = batch.bodyBIndex[threadIndex];
|
||
|
|
|
||
|
|
const PxgSolverBodyData& bd0 = bodies[bodyAIndex];
|
||
|
|
const PxgSolverBodyData& bd1 = bodies[bodyBIndex];
|
||
|
|
bool forceThreshold = false;
|
||
|
|
|
||
|
|
float normalForce = 0.f;
|
||
|
|
|
||
|
|
{
|
||
|
|
const PxgTGSBlockSolverContactHeader* PX_RESTRICT contactHeader = &contactHeaders[batch.mConstraintBatchIndex];
|
||
|
|
const PxgTGSBlockSolverFrictionHeader* PX_RESTRICT frictionHeader = &frictionHeaders[batch.mConstraintBatchIndex];
|
||
|
|
|
||
|
|
PxU32 forceWritebackOffset = contactHeader->forceWritebackOffset[threadIndex];
|
||
|
|
|
||
|
|
forceThreshold = contactHeader->flags[threadIndex] & PxgSolverContactFlags::eHAS_FORCE_THRESHOLDS;
|
||
|
|
|
||
|
|
const PxU32 numFrictionConstr = frictionHeader->numFrictionConstr[threadIndex];
|
||
|
|
|
||
|
|
const PxU32 numNormalConstr = contactHeader->numNormalConstr[threadIndex];
|
||
|
|
if(forceWritebackOffset!=0xFFFFFFFF)
|
||
|
|
{
|
||
|
|
PxReal* vForceWriteback = &forcewritebackBuffer[forceWritebackOffset];
|
||
|
|
PxgTGSBlockSolverContactPoint* c = &contactPoints[batch.startConstraintIndex];
|
||
|
|
for(PxU32 i=0; i<numNormalConstr; i++)
|
||
|
|
{
|
||
|
|
const PxReal appliedForce = c[i].appliedForce[threadIndex];//FStore(c->getAppliedForce());
|
||
|
|
*vForceWriteback++ = appliedForce;
|
||
|
|
normalForce += appliedForce;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
writeBackContactBlockFriction(threadIndex, numFrictionConstr, frictionHeader,
|
||
|
|
frictionPatchBlock, frictions + batch.startFrictionIndex, frictionPatches);
|
||
|
|
|
||
|
|
if(numFrictionConstr && frictionHeader->broken[threadIndex])
|
||
|
|
{
|
||
|
|
frictionPatchBlock.broken[threadIndex] = 1;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
float reportThreshold0 = bd0.reportThreshold;
|
||
|
|
float reportThreshold1 = bd1.reportThreshold;
|
||
|
|
|
||
|
|
if((forceThreshold && normalForce !=0 && (reportThreshold0 < PX_MAX_REAL || reportThreshold1 < PX_MAX_REAL)))
|
||
|
|
{
|
||
|
|
//ToDo : support PxgThresholdStreamElement
|
||
|
|
Dy::ThresholdStreamElement elt;
|
||
|
|
elt.normalForce = normalForce;
|
||
|
|
elt.threshold = PxMin<float>(reportThreshold0, reportThreshold1);
|
||
|
|
|
||
|
|
elt.nodeIndexA = bd0.islandNodeIndex;
|
||
|
|
elt.nodeIndexB = bd1.islandNodeIndex;
|
||
|
|
elt.shapeInteraction = batch.shapeInteraction[threadIndex];
|
||
|
|
PxOrder(elt.nodeIndexA, elt.nodeIndexB);
|
||
|
|
assert(elt.nodeIndexA < elt.nodeIndexB);
|
||
|
|
|
||
|
|
PxI32 index = atomicAdd(sharedThresholdStreamIndex, 1);
|
||
|
|
|
||
|
|
//KS - force a 16-byte coalesced write
|
||
|
|
//((float4*)thresholdStream)[index] = *((float4*)&elt);
|
||
|
|
thresholdStream[index] = elt;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// Using the same logic as the previous implementation, but mass-splitting is additionally performed per sub-timestep.
|
||
|
|
// Required data is packed and stored differently from the previous implementation to split mass at each sub-timestep;
|
||
|
|
// i.e., mass-related terms are computed at every sub-timestep.
|
||
|
|
// Refer to "Mass Splitting for Jitter-Free Parallel Rigid Body Simulation" for the general mass-splitting idea.
|
||
|
|
|
||
|
|
|
||
|
|
static __device__ void solve1DBlockTGS(const PxgBlockConstraintBatch& batch, PxVec3& b0LinVel, PxVec3& b0AngVel, PxVec3& b1LinVel, PxVec3& b1AngVel,
|
||
|
|
const PxVec3& linDelta0, const PxVec3& angDelta0, const PxVec3& linDelta1, const PxVec3& angDelta1, const PxU32 threadIndex,
|
||
|
|
const PxgTGSBlockSolverConstraint1DHeader* PX_RESTRICT headers, PxgTGSBlockSolverConstraint1DCon* PX_RESTRICT rowsCon,
|
||
|
|
const PxgSolverTxIData& iData0, const PxgSolverTxIData& iData1, const PxReal elapsedTime, bool residualReportingEnabled,
|
||
|
|
PxReal ref0 = 1.f, PxReal ref1 = 1.f)
|
||
|
|
{
|
||
|
|
using namespace physx;
|
||
|
|
|
||
|
|
//
|
||
|
|
// please refer to solve1DStep() in DyTGSContactPrep.cpp for a description of the parameters and some of the logic
|
||
|
|
//
|
||
|
|
|
||
|
|
const PxgTGSBlockSolverConstraint1DHeader* PX_RESTRICT header = &headers[batch.mConstraintBatchIndex];
|
||
|
|
PxgTGSBlockSolverConstraint1DCon* PX_RESTRICT baseCon = &rowsCon[batch.startConstraintIndex];
|
||
|
|
|
||
|
|
PxVec3 linVel0(b0LinVel.x, b0LinVel.y, b0LinVel.z);
|
||
|
|
PxVec3 linVel1(b1LinVel.x, b1LinVel.y, b1LinVel.z);
|
||
|
|
PxVec3 angVel0(b0AngVel.x, b0AngVel.y, b0AngVel.z);
|
||
|
|
PxVec3 angVel1(b1AngVel.x, b1AngVel.y, b1AngVel.z);
|
||
|
|
|
||
|
|
const float4 raInvMass0 = header->rAWorld_invMass0D0[threadIndex];
|
||
|
|
const float4 rbInvMass1 = header->rBWorld_invMass1D1[threadIndex];
|
||
|
|
|
||
|
|
float invMass0 = ref0 * raInvMass0.w; //FLoad(header->invMass0D0);
|
||
|
|
float invMass1 = ref1 * rbInvMass1.w; //FLoad(header->invMass1D1);
|
||
|
|
|
||
|
|
const PxVec3 raPrev(raInvMass0.x, raInvMass0.y, raInvMass0.z);
|
||
|
|
const PxVec3 rbPrev(rbInvMass1.x, rbInvMass1.y, rbInvMass1.z);
|
||
|
|
|
||
|
|
const PxVec3 ra = iData0.deltaBody2World.q.rotate(raPrev);
|
||
|
|
const PxVec3 rb = iData1.deltaBody2World.q.rotate(rbPrev);
|
||
|
|
|
||
|
|
const PxVec3 raMotion = (ra + linDelta0) - raPrev;
|
||
|
|
const PxVec3 rbMotion = (rb + linDelta1) - rbPrev;
|
||
|
|
|
||
|
|
float invInertiaScale0 = ref0 * header->invInertiaScale0[threadIndex];
|
||
|
|
float invInertiaScale1 = ref1 * header->invInertiaScale1[threadIndex];
|
||
|
|
|
||
|
|
|
||
|
|
const uchar4 rowCounts_breakable_orthoAxisCount = header->rowCounts_breakable_orthoAxisCount[threadIndex];
|
||
|
|
|
||
|
|
const PxU32 rowCount = rowCounts_breakable_orthoAxisCount.x;
|
||
|
|
|
||
|
|
const float4 ang0Ortho0_recipResponseW = header->angOrthoAxis0_recipResponseW[0][threadIndex];
|
||
|
|
const float4 ang0Ortho1_recipResponseW = header->angOrthoAxis0_recipResponseW[1][threadIndex];
|
||
|
|
const float4 ang0Ortho2_recipResponseW = header->angOrthoAxis0_recipResponseW[2][threadIndex];
|
||
|
|
|
||
|
|
const float4 ang1OrthoAxis0_ErrorW = header->angOrthoAxis1_ErrorW[0][threadIndex];
|
||
|
|
const float4 ang1OrthoAxis1_ErrorW = header->angOrthoAxis1_ErrorW[1][threadIndex];
|
||
|
|
const float4 ang1OrthoAxis2_ErrorW = header->angOrthoAxis1_ErrorW[2][threadIndex];
|
||
|
|
|
||
|
|
const float recipResponse0 = ang0Ortho0_recipResponseW.w;
|
||
|
|
const float recipResponse1 = ang0Ortho1_recipResponseW.w;
|
||
|
|
const float recipResponse2 = ang0Ortho2_recipResponseW.w;
|
||
|
|
|
||
|
|
const PxVec3 ang0Ortho0(ang0Ortho0_recipResponseW.x, ang0Ortho0_recipResponseW.y, ang0Ortho0_recipResponseW.z);
|
||
|
|
const PxVec3 ang0Ortho1(ang0Ortho1_recipResponseW.x, ang0Ortho1_recipResponseW.y, ang0Ortho1_recipResponseW.z);
|
||
|
|
const PxVec3 ang0Ortho2(ang0Ortho2_recipResponseW.x, ang0Ortho2_recipResponseW.y, ang0Ortho2_recipResponseW.z);
|
||
|
|
|
||
|
|
const PxVec3 ang1Ortho0(ang1OrthoAxis0_ErrorW.x, ang1OrthoAxis0_ErrorW.y, ang1OrthoAxis0_ErrorW.z);
|
||
|
|
const PxVec3 ang1Ortho1(ang1OrthoAxis1_ErrorW.x, ang1OrthoAxis1_ErrorW.y, ang1OrthoAxis1_ErrorW.z);
|
||
|
|
const PxVec3 ang1Ortho2(ang1OrthoAxis2_ErrorW.x, ang1OrthoAxis2_ErrorW.y, ang1OrthoAxis2_ErrorW.z);
|
||
|
|
|
||
|
|
PxReal error0 = ang1OrthoAxis0_ErrorW.w + (ang0Ortho0.dot(angDelta0) - ang1Ortho0.dot(angDelta1));
|
||
|
|
PxReal error1 = ang1OrthoAxis1_ErrorW.w + (ang0Ortho1.dot(angDelta0) - ang1Ortho1.dot(angDelta1));
|
||
|
|
PxReal error2 = ang1OrthoAxis2_ErrorW.w + (ang0Ortho2.dot(angDelta0) - ang1Ortho2.dot(angDelta1));
|
||
|
|
|
||
|
|
for (PxU32 i = 0; i < rowCount; ++i)
|
||
|
|
{
|
||
|
|
PxgTGSBlockSolverConstraint1DCon& ccon = baseCon[i];
|
||
|
|
|
||
|
|
const float4 _clinVel0_coeff0W = ccon.lin0XYZ_initBiasOrCoeff0[threadIndex];
|
||
|
|
const float4 _clinVel1_coeff1W = ccon.lin1XYZ_biasScaleOrCoeff1[threadIndex];
|
||
|
|
const float4 _cangVel0_coeff2W = ccon.ang0XYZ_velMultiplierOrCoeff2[threadIndex];
|
||
|
|
const float4 _cangVel1_coeff3W = ccon.ang1XYZ_velTargetOrCoeff3[threadIndex];
|
||
|
|
|
||
|
|
const PxVec3 clinVel0(_clinVel0_coeff0W.x, _clinVel0_coeff0W.y, _clinVel0_coeff0W.z);
|
||
|
|
const PxVec3 clinVel1(_clinVel1_coeff1W.x, _clinVel1_coeff1W.y, _clinVel1_coeff1W.z);
|
||
|
|
const PxVec3 cangVel0_(_cangVel0_coeff2W.x, _cangVel0_coeff2W.y, _cangVel0_coeff2W.z);
|
||
|
|
const PxVec3 cangVel1_(_cangVel1_coeff3W.x, _cangVel1_coeff3W.y, _cangVel1_coeff3W.z);
|
||
|
|
|
||
|
|
PxReal initBias = _clinVel0_coeff0W.w;
|
||
|
|
const PxReal biasScale = _clinVel1_coeff1W.w;
|
||
|
|
const PxReal velMultiplier = _cangVel0_coeff2W.w;
|
||
|
|
const PxReal targetVel = _cangVel1_coeff3W.w;
|
||
|
|
|
||
|
|
const PxVec3 cangVel0 = cangVel0_ + ra.cross(clinVel0);
|
||
|
|
const PxVec3 cangVel1 = cangVel1_ + rb.cross(clinVel1);
|
||
|
|
|
||
|
|
const PxU32 flags = ccon.flags[threadIndex];
|
||
|
|
|
||
|
|
const PxReal maxBias = ccon.maxBias[threadIndex];
|
||
|
|
|
||
|
|
const PxReal minBias = Dy::computeMinBiasTGS(flags, maxBias);
|
||
|
|
|
||
|
|
PxVec3 raXnI = iData0.sqrtInvInertia * cangVel0;
|
||
|
|
PxVec3 rbXnI = iData1.sqrtInvInertia * cangVel1;
|
||
|
|
|
||
|
|
//KS - TODO - orthogonalization here...
|
||
|
|
if (flags & DY_SC_FLAG_ORTHO_TARGET)
|
||
|
|
{
|
||
|
|
const PxReal proj0 = (raXnI.dot(ang0Ortho0) +
|
||
|
|
rbXnI.dot(ang1Ortho0)) * recipResponse0;
|
||
|
|
|
||
|
|
const PxReal proj1 = (raXnI.dot(ang0Ortho1) +
|
||
|
|
rbXnI.dot(ang1Ortho1)) * recipResponse1;
|
||
|
|
|
||
|
|
const PxReal proj2 = (raXnI.dot(ang0Ortho2) +
|
||
|
|
rbXnI.dot(ang1Ortho2)) * recipResponse2;
|
||
|
|
|
||
|
|
const PxVec3 delta0 = ang0Ortho0 * proj0 + ang0Ortho1 * proj1 + ang0Ortho2 * proj2;
|
||
|
|
const PxVec3 delta1 = ang1Ortho0 * proj0 + ang1Ortho1 * proj1 + ang1Ortho2 * proj2;
|
||
|
|
|
||
|
|
raXnI = raXnI - delta0;
|
||
|
|
rbXnI = rbXnI - delta1;
|
||
|
|
|
||
|
|
const PxReal orthoBasisError = biasScale * (error0 * proj0 + error1 * proj1 + error2 * proj2);
|
||
|
|
initBias = initBias - orthoBasisError;
|
||
|
|
}
|
||
|
|
|
||
|
|
// If biasScale is zero, initBias must also be zero.
|
||
|
|
// biasScale is enforced to zero before the velocity itertaion (e.g., in conclude1DBlockTGS), but not initBias.
|
||
|
|
// Thus, explicitly resetting it to zero here for velocity iteration.
|
||
|
|
initBias = (biasScale == 0.f) ? 0.f : initBias;
|
||
|
|
|
||
|
|
const bool isSpringConstraint = (flags & DY_SC_FLAG_SPRING);
|
||
|
|
|
||
|
|
const PxReal errorDelta = Dy::computeResolvedGeometricErrorTGS(raMotion, rbMotion, clinVel0, clinVel1,
|
||
|
|
angDelta0, angDelta1, raXnI, rbXnI,
|
||
|
|
ccon.angularErrorScale[threadIndex],
|
||
|
|
isSpringConstraint, targetVel, elapsedTime);
|
||
|
|
|
||
|
|
const PxReal resp0 = invMass0 * clinVel0.dot(clinVel0) + raXnI.dot(raXnI) * invInertiaScale0;
|
||
|
|
const PxReal resp1 = invMass1 * clinVel1.dot(clinVel1) + rbXnI.dot(rbXnI) * invInertiaScale1;
|
||
|
|
|
||
|
|
//KS - may be a - required here...
|
||
|
|
const PxReal response = resp0 + resp1;
|
||
|
|
|
||
|
|
const PxReal recipResponse = response > 0.f ? 1.f / response : 0.f;
|
||
|
|
|
||
|
|
const PxReal vMul = isSpringConstraint ? velMultiplier : recipResponse * velMultiplier;
|
||
|
|
|
||
|
|
const PxReal unclampedBias = initBias + errorDelta * biasScale;
|
||
|
|
|
||
|
|
const PxReal bias = PxClamp(unclampedBias, minBias, maxBias);
|
||
|
|
|
||
|
|
const PxReal constant = isSpringConstraint ? bias + targetVel : recipResponse * (bias + targetVel);
|
||
|
|
|
||
|
|
const float appliedForce = ccon.appliedForce[threadIndex];//FLoad(c.appliedForce);
|
||
|
|
//const FloatV targetVel = FLoad(c.targetVelocity);
|
||
|
|
|
||
|
|
const float maxImpulse = ccon.maxImpulse[threadIndex];//FLoad(c.maxImpulse);
|
||
|
|
const float minImpulse = ccon.minImpulse[threadIndex];//FLoad(c.minImpulse);
|
||
|
|
|
||
|
|
//const PxVec3 v0 = linVel0.multiply(clinVel0) + angVel0.multiply(cangVel0);//V3MulAdd(linVel0, clinVel0, V3Mul(angVel0, cangVel0));
|
||
|
|
//const PxVec3 v1 = linVel1.multiply(clinVel1) + angVel1.multiply(cangVel1);//V3MulAdd(linVel1, clinVel1, V3Mul(angVel1, cangVel1));
|
||
|
|
|
||
|
|
const float v0 = linVel0.dot(clinVel0) + angVel0.dot(raXnI);//V3MulAdd(linVel0, clinVel0, V3Mul(angVel0, cangVel0));
|
||
|
|
const float v1 = linVel1.dot(clinVel1) + angVel1.dot(rbXnI);//V3MulAdd(linVel1, clinVel1, V3Mul(angVel1, cangVel1));
|
||
|
|
|
||
|
|
const float normalVel = v0 - v1;
|
||
|
|
|
||
|
|
const float unclampedForce = appliedForce + (vMul * normalVel + constant);//FMulAdd(iMul, appliedForce, FMulAdd(vMul, normalVel, constant));
|
||
|
|
const float clampedForce = fminf(maxImpulse, fmaxf(minImpulse, unclampedForce));//FMin(maxImpulse, (FMax(minImpulse, unclampedForce)));
|
||
|
|
const float deltaF = clampedForce - appliedForce;//FSub(clampedForce, appliedForce);
|
||
|
|
|
||
|
|
ccon.appliedForce[threadIndex] = clampedForce; // FStore(clampedForce);
|
||
|
|
if(residualReportingEnabled)
|
||
|
|
ccon.residual[threadIndex] = PxgErrorAccumulator::calculateResidual(deltaF, vMul);
|
||
|
|
|
||
|
|
linVel0 = linVel0 + clinVel0 * (deltaF * invMass0);//V3ScaleAdd(clinVel0, FMul(deltaF, invMass0), linVel0);
|
||
|
|
linVel1 = linVel1 - clinVel1 * (deltaF * invMass1);//V3NegScaleSub(clinVel1, FMul(deltaF, invMass1), linVel1);
|
||
|
|
angVel0 = angVel0 + raXnI * deltaF * invInertiaScale0;//V3ScaleAdd(cangVel0, deltaF, angVel0);
|
||
|
|
angVel1 = angVel1 - rbXnI * deltaF * invInertiaScale1;//V3NegScaleSub(cangVel1, deltaF, angVel1);
|
||
|
|
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
b0LinVel = PxVec3(linVel0.x, linVel0.y, linVel0.z);
|
||
|
|
b0AngVel = PxVec3(angVel0.x, angVel0.y, angVel0.z);
|
||
|
|
b1LinVel = PxVec3(linVel1.x, linVel1.y, linVel1.z);
|
||
|
|
b1AngVel = PxVec3(angVel1.x, angVel1.y, angVel1.z);
|
||
|
|
}
|
||
|
|
|
||
|
|
// Using the same logic as the previous implementation, but mass-splitting is additionally performed per sub-timestep.
|
||
|
|
// Required data is packed and stored differently from the previous implementation to split mass at each sub-timestep;
|
||
|
|
// i.e., mass-related terms are computed at every sub-timestep.
|
||
|
|
|
||
|
|
static __device__ PX_FORCE_INLINE void solveExt1DBlockTGS(const PxgBlockConstraintBatch& batch,
|
||
|
|
Cm::UnAlignedSpatialVector& vel0,
|
||
|
|
Cm::UnAlignedSpatialVector& vel1,
|
||
|
|
const Cm::UnAlignedSpatialVector& motion0,
|
||
|
|
const Cm::UnAlignedSpatialVector& motion1,
|
||
|
|
const PxU32 threadIndex,
|
||
|
|
const PxgTGSBlockSolverConstraint1DHeader* PX_RESTRICT headers,
|
||
|
|
PxgTGSBlockSolverConstraint1DCon* PX_RESTRICT rowsCon,
|
||
|
|
PxgArticulationBlockResponse* PX_RESTRICT artiResponse,
|
||
|
|
const PxQuat& deltaQ0, const PxQuat& deltaQ1,
|
||
|
|
const PxReal elapsedTime,
|
||
|
|
Cm::UnAlignedSpatialVector& impluse0,
|
||
|
|
Cm::UnAlignedSpatialVector& impluse1,
|
||
|
|
bool residualReportingEnabled,
|
||
|
|
PxReal ref0 = 1.f, PxReal ref1 = 1.f)
|
||
|
|
{
|
||
|
|
using namespace physx;
|
||
|
|
|
||
|
|
const PxgTGSBlockSolverConstraint1DHeader* PX_RESTRICT header = &headers[batch.mConstraintBatchIndex];
|
||
|
|
PxgTGSBlockSolverConstraint1DCon* PX_RESTRICT baseCon = &rowsCon[batch.startConstraintIndex];
|
||
|
|
const PxgArticulationBlockResponse* PX_RESTRICT responses = &artiResponse[batch.mArticulationResponseIndex];
|
||
|
|
|
||
|
|
PxVec3 linVel0 = vel0.bottom;
|
||
|
|
PxVec3 linVel1 = vel1.bottom;
|
||
|
|
PxVec3 angVel0 = vel0.top;
|
||
|
|
PxVec3 angVel1 = vel1.top;
|
||
|
|
|
||
|
|
const float4 raInvMass0 = header->rAWorld_invMass0D0[threadIndex];
|
||
|
|
const float4 rbInvMass1 = header->rBWorld_invMass1D1[threadIndex];
|
||
|
|
|
||
|
|
float invMass0 = ref0 * raInvMass0.w; //FLoad(header->invMass0D0);
|
||
|
|
float invMass1 = ref1 * rbInvMass1.w; //FLoad(header->invMass1D1);
|
||
|
|
|
||
|
|
float invInertiaScale0 = ref0 * header->invInertiaScale0[threadIndex];
|
||
|
|
float invInertiaScale1 = ref1 * header->invInertiaScale1[threadIndex];
|
||
|
|
|
||
|
|
const PxVec3 raPrev(raInvMass0.x, raInvMass0.y, raInvMass0.z);
|
||
|
|
const PxVec3 rbPrev(rbInvMass1.x, rbInvMass1.y, rbInvMass1.z);
|
||
|
|
|
||
|
|
const PxVec3 ra = deltaQ0.rotate(raPrev);
|
||
|
|
const PxVec3 rb = deltaQ1.rotate(rbPrev);
|
||
|
|
|
||
|
|
const PxVec3 raMotion = (ra + motion0.bottom) - raPrev;
|
||
|
|
const PxVec3 rbMotion = (rb + motion1.bottom) - rbPrev;
|
||
|
|
|
||
|
|
const uchar4 rowCounts_breakable_orthoAxisCount = header->rowCounts_breakable_orthoAxisCount[threadIndex];
|
||
|
|
|
||
|
|
const PxU32 rowCount = rowCounts_breakable_orthoAxisCount.x;
|
||
|
|
|
||
|
|
const PxReal cfm = header->cfm[threadIndex];
|
||
|
|
|
||
|
|
PxVec3 li0(0.f, 0.f, 0.f);
|
||
|
|
PxVec3 li1(0.f, 0.f, 0.f);
|
||
|
|
PxVec3 ai0(0.f, 0.f, 0.f);
|
||
|
|
PxVec3 ai1(0.f, 0.f, 0.f);
|
||
|
|
|
||
|
|
for (PxU32 i = 0; i < rowCount; ++i)
|
||
|
|
{
|
||
|
|
PxgTGSBlockSolverConstraint1DCon& ccon = baseCon[i];
|
||
|
|
const PxgArticulationBlockResponse& response = responses[i];
|
||
|
|
|
||
|
|
const float4 _clinVel0_coeff0W = ccon.lin0XYZ_initBiasOrCoeff0[threadIndex];//V3LoadA(c.lin0);
|
||
|
|
const float4 _clinVel1_coeff1W = ccon.lin1XYZ_biasScaleOrCoeff1[threadIndex];//V3LoadA(c.lin1);
|
||
|
|
const float4 _cangVel0_coeff2W = ccon.ang0XYZ_velMultiplierOrCoeff2[threadIndex];//V3LoadA(c.ang0);
|
||
|
|
const float4 _cangVel1_coeff3W = ccon.ang1XYZ_velTargetOrCoeff3[threadIndex];//V3LoadA(c.ang1);
|
||
|
|
|
||
|
|
const PxVec3 clinVel0(_clinVel0_coeff0W.x, _clinVel0_coeff0W.y, _clinVel0_coeff0W.z);
|
||
|
|
const PxVec3 clinVel1(_clinVel1_coeff1W.x, _clinVel1_coeff1W.y, _clinVel1_coeff1W.z);
|
||
|
|
const PxVec3 cangVel0(_cangVel0_coeff2W.x, _cangVel0_coeff2W.y, _cangVel0_coeff2W.z);
|
||
|
|
const PxVec3 cangVel1(_cangVel1_coeff3W.x, _cangVel1_coeff3W.y, _cangVel1_coeff3W.z);
|
||
|
|
|
||
|
|
const PxU32 flags = ccon.flags[threadIndex];
|
||
|
|
const bool isSpringConstraint = (flags & DY_SC_FLAG_SPRING);
|
||
|
|
|
||
|
|
const PxReal resp0 = ref0 * ccon.resp0[threadIndex];
|
||
|
|
const PxReal resp1 = ref1 * ccon.resp1[threadIndex];
|
||
|
|
|
||
|
|
float unitResponse = resp0 + resp1;
|
||
|
|
unitResponse += cfm;
|
||
|
|
|
||
|
|
//https://omniverse-jirasw.nvidia.com/browse/PX-4383
|
||
|
|
const PxReal minRowResponse = DY_ARTICULATION_MIN_RESPONSE;
|
||
|
|
const PxReal recipResponse = Dy::computeRecipUnitResponse(unitResponse, minRowResponse);
|
||
|
|
|
||
|
|
const bool isAccelerationSpring = (flags & DY_SC_FLAG_ACCELERATION_SPRING);
|
||
|
|
|
||
|
|
const PxReal coeff0 = _clinVel0_coeff0W.w;
|
||
|
|
const PxReal coeff1 = _clinVel1_coeff1W.w;
|
||
|
|
const PxReal coeff2 = _cangVel0_coeff2W.w;
|
||
|
|
const PxReal coeff3 = _cangVel1_coeff3W.w;
|
||
|
|
const PxReal geometricError = ccon.geometricError[threadIndex];
|
||
|
|
|
||
|
|
PxReal biasScale, velMultiplier, initBias, targetVel;
|
||
|
|
Dy::compute1dConstraintSolverConstantsTGS(isSpringConstraint, isAccelerationSpring, geometricError,
|
||
|
|
unitResponse, recipResponse, coeff0, coeff1, coeff2,
|
||
|
|
coeff3, biasScale, velMultiplier, initBias, targetVel);
|
||
|
|
|
||
|
|
// If biasScale is zero, initBias must also be zero.
|
||
|
|
// biasScale is enforced to zero before the velocity itertaion (e.g., in conclude1DBlockTGS), but not initBias.
|
||
|
|
// Thus, explicitly resetting it to zero here for velocity iteration.
|
||
|
|
initBias = (biasScale == 0.f) ? 0.f : initBias;
|
||
|
|
|
||
|
|
const PxReal errorDelta = Dy::computeResolvedGeometricErrorTGS(raMotion, rbMotion, clinVel0, clinVel1,
|
||
|
|
motion0.top, motion1.top, cangVel0, cangVel1,
|
||
|
|
ccon.angularErrorScale[threadIndex],
|
||
|
|
isSpringConstraint, targetVel, elapsedTime);
|
||
|
|
|
||
|
|
|
||
|
|
const PxReal maxBias = ccon.maxBias[threadIndex];
|
||
|
|
|
||
|
|
const PxReal vMul = isSpringConstraint ? velMultiplier : recipResponse * velMultiplier;
|
||
|
|
const float appliedForce = ccon.appliedForce[threadIndex];
|
||
|
|
|
||
|
|
const PxReal unclampedBias = initBias + errorDelta * biasScale;
|
||
|
|
const PxReal minBias = Dy::computeMinBiasTGS(flags, maxBias);
|
||
|
|
const PxReal bias = PxClamp(unclampedBias, minBias, maxBias);
|
||
|
|
|
||
|
|
const PxReal constant = isSpringConstraint ? (bias + targetVel) : recipResponse * (bias + targetVel);
|
||
|
|
|
||
|
|
const float maxImpulse = ccon.maxImpulse[threadIndex];//FLoad(c.maxImpulse);
|
||
|
|
const float minImpulse = ccon.minImpulse[threadIndex];//FLoad(c.minImpulse);
|
||
|
|
|
||
|
|
const float v0 = linVel0.dot(clinVel0) + angVel0.dot(cangVel0);//V3MulAdd(linVel0, clinVel0, V3Mul(angVel0, cangVel0));
|
||
|
|
const float v1 = linVel1.dot(clinVel1) + angVel1.dot(cangVel1);//V3MulAdd(linVel1, clinVel1, V3Mul(angVel1, cangVel1));
|
||
|
|
|
||
|
|
const float normalVel = v0 - v1;
|
||
|
|
|
||
|
|
const float unclampedForce = appliedForce + (vMul * normalVel + constant);//FMulAdd(iMul, appliedForce, FMulAdd(vMul, normalVel, constant));
|
||
|
|
const float clampedForce = fminf(maxImpulse, fmaxf(minImpulse, unclampedForce));//FMin(maxImpulse, (FMax(minImpulse, unclampedForce)));
|
||
|
|
const float deltaF = clampedForce - appliedForce;//FSub(clampedForce, appliedForce);
|
||
|
|
|
||
|
|
ccon.appliedForce[threadIndex] = clampedForce; // FStore(clampedForce);
|
||
|
|
if(residualReportingEnabled)
|
||
|
|
ccon.residual[threadIndex] = PxgErrorAccumulator::calculateResidual(deltaF, vMul);
|
||
|
|
|
||
|
|
li0 = clinVel0 * deltaF + li0;
|
||
|
|
ai0 = cangVel0 * deltaF + ai0;
|
||
|
|
li1 = clinVel1 * deltaF + li1;
|
||
|
|
ai1 = cangVel1 * deltaF + ai1;
|
||
|
|
|
||
|
|
PxVec3 linVa = ref0 * PxVec3(response.deltaRALin_x[threadIndex], response.deltaRALin_y[threadIndex], response.deltaRALin_z[threadIndex]);
|
||
|
|
PxVec3 angVa = ref0 * PxVec3(response.deltaRAAng_x[threadIndex], response.deltaRAAng_y[threadIndex], response.deltaRAAng_z[threadIndex]);
|
||
|
|
PxVec3 linVb = ref1 * PxVec3(response.deltaRBLin_x[threadIndex], response.deltaRBLin_y[threadIndex], response.deltaRBLin_z[threadIndex]);
|
||
|
|
PxVec3 angVb = ref1 * PxVec3(response.deltaRBAng_x[threadIndex], response.deltaRBAng_y[threadIndex], response.deltaRBAng_z[threadIndex]);
|
||
|
|
|
||
|
|
linVel0 = linVa * deltaF + linVel0;
|
||
|
|
angVel0 = angVa * deltaF + angVel0;
|
||
|
|
|
||
|
|
linVel1 = linVb * deltaF + linVel1;
|
||
|
|
angVel1 = angVb * deltaF + angVel1;
|
||
|
|
}
|
||
|
|
|
||
|
|
vel0.top = angVel0; vel0.bottom = linVel0;
|
||
|
|
vel1.top = angVel1; vel1.bottom = linVel1;
|
||
|
|
|
||
|
|
impluse0.top = li0 * invMass0; impluse0.bottom = ai0 * invInertiaScale0;
|
||
|
|
impluse1.top = li1 * invMass1; impluse1.bottom = ai1 * invInertiaScale1;
|
||
|
|
}
|
||
|
|
|
||
|
|
static __device__ void conclude1DBlockTGS(const PxgBlockConstraintBatch& batch, const PxU32 threadIndex, const PxgTGSBlockSolverConstraint1DHeader* PX_RESTRICT headers, PxgTGSBlockSolverConstraint1DCon* PX_RESTRICT rows)
|
||
|
|
{
|
||
|
|
using namespace physx;
|
||
|
|
|
||
|
|
const PxgTGSBlockSolverConstraint1DHeader* PX_RESTRICT header = &headers[batch.mConstraintBatchIndex];
|
||
|
|
PxgTGSBlockSolverConstraint1DCon* PX_RESTRICT base = &rows[batch.startConstraintIndex];
|
||
|
|
|
||
|
|
const uchar4 rowCount = header->rowCounts_breakable_orthoAxisCount[threadIndex];
|
||
|
|
|
||
|
|
for (PxU32 i = 0; i<rowCount.x; i++)
|
||
|
|
{
|
||
|
|
PxgTGSBlockSolverConstraint1DCon& c = base[i];
|
||
|
|
|
||
|
|
// Previous data, biasScale, initBias, velTarget, and velMultiplier are repacked using other coefficients.
|
||
|
|
// See "PxgTGSBlockSolverConstraint1DCon" for details.
|
||
|
|
|
||
|
|
if(!(c.flags[threadIndex] & DY_SC_FLAG_KEEP_BIAS))
|
||
|
|
{
|
||
|
|
c.lin1XYZ_biasScaleOrCoeff1[threadIndex].w = 0.f; // setting biasScale to zero. Make sure to enforce
|
||
|
|
// initialBias to be zero, when biasScale is zero.
|
||
|
|
}
|
||
|
|
if(c.flags[threadIndex] & DY_SC_FLAG_SPRING)
|
||
|
|
{
|
||
|
|
// see CPU version for an explanation
|
||
|
|
c.lin0XYZ_initBiasOrCoeff0[threadIndex].w = 0.f;
|
||
|
|
c.lin1XYZ_biasScaleOrCoeff1[threadIndex].w = 0.f;
|
||
|
|
|
||
|
|
c.ang0XYZ_velMultiplierOrCoeff2[threadIndex].w = 0.0f;
|
||
|
|
c.ang1XYZ_velTargetOrCoeff3[threadIndex].w = 0.0f;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
static __device__ void writeBack1DBlockTGS(const PxgBlockConstraintBatch& batch, const PxU32 threadIndex, const PxgTGSBlockSolverConstraint1DHeader* PX_RESTRICT headers,
|
||
|
|
PxgTGSBlockSolverConstraint1DCon* PX_RESTRICT rowsCon, PxgConstraintWriteback* constraintWriteBacks)
|
||
|
|
{
|
||
|
|
const PxgTGSBlockSolverConstraint1DHeader* PX_RESTRICT header = &headers[batch.mConstraintBatchIndex];
|
||
|
|
PxgTGSBlockSolverConstraint1DCon* conBase = &rowsCon[batch.startConstraintIndex];
|
||
|
|
|
||
|
|
PxU32 forceWritebackOffset = header->writeBackOffset[threadIndex];
|
||
|
|
|
||
|
|
const uchar4 rowCounts_breakable_orthoAxisCount = header->rowCounts_breakable_orthoAxisCount[threadIndex];
|
||
|
|
|
||
|
|
const PxU8 breakable = rowCounts_breakable_orthoAxisCount.y;
|
||
|
|
|
||
|
|
const PxU32 numRows = rowCounts_breakable_orthoAxisCount.x;
|
||
|
|
|
||
|
|
if (forceWritebackOffset != 0xFFFFFFFF)
|
||
|
|
{
|
||
|
|
PxgConstraintWriteback& writeback = constraintWriteBacks[forceWritebackOffset];
|
||
|
|
|
||
|
|
PxVec3 linVel(0), angVel(0);
|
||
|
|
PxReal constraintErrorSq = 0.0f;
|
||
|
|
for (PxU32 i = 0; i < numRows; ++i)
|
||
|
|
{
|
||
|
|
PxgTGSBlockSolverConstraint1DCon& con = conBase[i];
|
||
|
|
|
||
|
|
if (con.flags[threadIndex] & DY_SC_FLAG_OUTPUT_FORCE)
|
||
|
|
{
|
||
|
|
const float4 lin0XYZ_ErrorW = con.lin0XYZ_initBiasOrCoeff0[threadIndex];
|
||
|
|
const float4 ang0WriteBack_VMW = con.ang0XYZ_velMultiplierOrCoeff2[threadIndex];
|
||
|
|
|
||
|
|
const PxVec3 lin0(lin0XYZ_ErrorW.x, lin0XYZ_ErrorW.y, lin0XYZ_ErrorW.z);
|
||
|
|
const PxVec3 ang0WriteBack(ang0WriteBack_VMW.x, ang0WriteBack_VMW.y, ang0WriteBack_VMW.z);
|
||
|
|
const PxReal appliedForce = con.appliedForce[threadIndex];
|
||
|
|
linVel += lin0 * appliedForce;
|
||
|
|
angVel += ang0WriteBack *appliedForce;
|
||
|
|
}
|
||
|
|
|
||
|
|
PxReal err = con.residual[threadIndex];
|
||
|
|
constraintErrorSq += err * err;
|
||
|
|
}
|
||
|
|
|
||
|
|
const float4 body0WorldOffset_InvMass0D0 = header->rAWorld_invMass0D0[threadIndex];
|
||
|
|
const PxVec3 body0WorldOffset(body0WorldOffset_InvMass0D0.x, body0WorldOffset_InvMass0D0.y, body0WorldOffset_InvMass0D0.z);
|
||
|
|
angVel -= body0WorldOffset.cross(linVel);
|
||
|
|
|
||
|
|
|
||
|
|
const PxU32 broken = breakable ? PxU32((linVel.magnitude() > header->linBreakImpulse[threadIndex]) || (angVel.magnitude() > header->angBreakImpulse[threadIndex])) : 0;
|
||
|
|
writeback.angularImpulse_residual = make_float4(angVel.x, angVel.y, angVel.z, constraintErrorSq);
|
||
|
|
writeback.linearImpulse_broken = make_float4(linVel.x, linVel.y, linVel.z, broken ? -0.0f : 0.0f);
|
||
|
|
}
|
||
|
|
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
#endif
|