feat(physics): wire physx sdk into build
This commit is contained in:
203
engine/third_party/physx/source/gpuarticulation/include/PxgArticulationCore.h
vendored
Normal file
203
engine/third_party/physx/source/gpuarticulation/include/PxgArticulationCore.h
vendored
Normal file
@@ -0,0 +1,203 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PXG_ARTICULATION_CORE_H
|
||||
#define PXG_ARTICULATION_CORE_H
|
||||
|
||||
#include "PxDirectGPUAPI.h"
|
||||
#include "foundation/PxPreprocessor.h"
|
||||
#include "foundation/PxSimpleTypes.h"
|
||||
#include "foundation/PxPinnedArray.h"
|
||||
#include "foundation/PxUserAllocated.h"
|
||||
#include "PxgCudaBuffer.h"
|
||||
#if !PX_CUDA_COMPILER
|
||||
#include <vector_types.h>
|
||||
#endif
|
||||
|
||||
#include "DyFeatherstoneArticulation.h"
|
||||
#include "PxgArticulation.h"
|
||||
|
||||
namespace physx
|
||||
{
|
||||
//this is needed to force PhysXArticulationGpu linkage as Static Library!
|
||||
void createPxgArticulation();
|
||||
|
||||
class PxgCudaKernelWranglerManager;
|
||||
class PxCudaContextManager;
|
||||
class PxCudaContext;
|
||||
class PxgHeapMemoryAllocatorManager;
|
||||
|
||||
class PxgGpuContext;
|
||||
|
||||
struct PxgSolverReferences;
|
||||
struct PxgSolverBodySleepData;
|
||||
|
||||
struct PxgArticulationCoreDesc;
|
||||
struct PxgArticulationOutputDesc;
|
||||
|
||||
struct PxIndexDataPair;
|
||||
class PxSceneDesc;
|
||||
|
||||
class PxgArticulationCore : public PxUserAllocated
|
||||
{
|
||||
public:
|
||||
PxgArticulationCore(PxgCudaKernelWranglerManager* gpuKernelWrangler, PxCudaContextManager* cudaContextManager, PxgHeapMemoryAllocatorManager* heapMemoryManager);
|
||||
~PxgArticulationCore();
|
||||
|
||||
void gpuMemDmaUpArticulationDesc(const PxU32 offset, const PxU32 nbArticulations, PxReal dt, const PxVec3& gravity, const PxReal invLengthScale, const bool isExternalForcesEveryTgsIterationEnabled);
|
||||
|
||||
void createStaticContactAndConstraintsBatch(const PxU32 nbArticulations);
|
||||
|
||||
PxU32 computeUnconstrainedVelocities(const PxU32 offset, const PxU32 nbArticulations, PxReal dt, const PxVec3& gravity, const PxReal invLengthScale, const bool isExternalForcesEveryTgsIterationEnabled, bool recomputeBlockFormat);
|
||||
PxU32 setupInternalConstraints(const PxU32 nbArticulations, const PxReal stepDt, const PxReal dt, const PxReal invDt, const bool isTGSSolver);
|
||||
void syncStream();
|
||||
|
||||
void precomputeDependencies(const PxU32 nbPartitions);
|
||||
|
||||
void syncUnconstrainedVelocities();
|
||||
|
||||
void propagateRigidBodyImpulsesAndSolveInternalConstraints(const PxReal dt, const PxReal invDt, const bool velocityIteration, const PxReal elapsedTime,
|
||||
const PxReal biasCoefficient, PxU32* staticContactUniqueIndices, PxU32* staticJointUniqueIndices,
|
||||
CUdeviceptr sharedDesc, bool doFriction, bool isTGS, bool residualReportingEnabled, bool isExternalForcesEveryTgsIterationEnabled = false);
|
||||
|
||||
//These two methods are for articulation vs soft body interaction
|
||||
void outputVelocity(CUdeviceptr sharedDesc, CUstream solverStream, bool isTGS);
|
||||
void pushImpulse(CUstream solverStream);
|
||||
|
||||
void stepArticulation(const PxReal stepDt);
|
||||
void averageDeltaV(const PxU32 nbSlabs, CUstream stream, float4* velocities, const PxU32 partitionId,
|
||||
bool isTGS, CUdeviceptr sharedDescd);
|
||||
void applyTgsSubstepForces(PxReal stepDt, CUstream stream);
|
||||
void saveVelocities();
|
||||
void updateBodies(PxReal dt, bool integrate, bool enableDirectGPUAPI);
|
||||
void gpuMemDMAbackArticulation(PxInt8ArrayPinned& linkAndJointAndRootStateData,
|
||||
PxPinnedArray<PxgSolverBodySleepData>& wakeCounterPool, PxPinnedArray<Dy::ErrorAccumulator>& internalResidualPerArticulation, PxPinnedArray<Dy::ErrorAccumulator>& contactResidual);
|
||||
|
||||
void setSolverStream(CUstream& solverStream) { mSolverStream = &solverStream; }
|
||||
|
||||
void setGpuContext(PxgGpuContext* context) { mGpuContext = context; }
|
||||
|
||||
PxgArticulationCoreDesc* getArticulationCoreDesc() { return mArticulationCoreDesc; }
|
||||
|
||||
CUdeviceptr getArticulationCoreDescd() { return mArticulationCoreDescd.getDevicePtr(); }
|
||||
|
||||
CUdeviceptr getDeferredZ() { return mDeltaVs.getDevicePtr(); }
|
||||
|
||||
CUdeviceptr getArticulationDirty() { return mSlabHasChanges.getDevicePtr(); }
|
||||
|
||||
CUdeviceptr getArticulationSlabMask() { return mSlabDirtyMasks.getDevicePtr(); }
|
||||
|
||||
PxU32 getNbActiveArticulations() const { return mNbActiveArticulation; }
|
||||
|
||||
CUstream getStream() { return mStream; }
|
||||
|
||||
CUevent getFlushArticulationDataEvent() { return mFlushArticulationDataEvent; }
|
||||
|
||||
void synchronizedStreams(CUstream bpStream, CUstream npStream);
|
||||
|
||||
bool getArticulationData(void* data, const PxArticulationGPUIndex* gpuIndices, PxArticulationGPUAPIReadType::Enum dataType, PxU32 nbElements, CUevent startEvent, CUevent finishEvent, PxU32 maxLinks, PxU32 maxDofs, PxU32 maxFixedTendons, PxU32 maxTendonJoints, PxU32 maxSpatialTendons, PxU32 maxSpatialAttachments) const;
|
||||
bool setArticulationData(const void* data, const PxArticulationGPUIndex* gpuIndices, PxArticulationGPUAPIWriteType::Enum dataType, PxU32 nbElements, CUevent startEvent, CUevent finishEvent, PxU32 maxLinks, PxU32 maxDofs, PxU32 maxFixedTendons, PxU32 maxTendonJoints, PxU32 maxSpatialTendons, PxU32 maxSpatialAttachments);
|
||||
bool computeArticulationData(void* data, const PxArticulationGPUIndex* gpuIndices, PxArticulationGPUAPIComputeType::Enum operation, PxU32 nbElements,
|
||||
PxU32 maxLinks, PxU32 maxDofs, CUevent startEvent, CUevent finishEvent);
|
||||
|
||||
// needed if root transforms or joint positions are updated using direct-GPU API. needs to be called before computeUnconstrainedVelocities.
|
||||
void updateArticulationsKinematic(bool zeroSimOutput, const PxArticulationGPUIndex* PX_RESTRICT gpuIndices=NULL, PxU32 nbElements=0);
|
||||
|
||||
void allocDeltaVBuffer(PxU32 nbSlabs, PxU32 nbPartitions, CUstream stream);
|
||||
|
||||
void layoutDeltaVBuffer(const PxU32 nbSlabs, const PxU32 nbPartitions, CUstream stream);
|
||||
|
||||
private:
|
||||
// new Direct-GPU API methods
|
||||
bool getDofStates(void* data, const PxArticulationGPUIndex* gpuIndices, PxU32 nbElements, PxU32 maxDofs, PxArticulationGPUAPIReadType::Enum dataType) const;
|
||||
bool getTransformStates(void* data, const PxArticulationGPUIndex* gpuIndices, PxU32 nbElements, PxU32 maxLinks, PxArticulationGPUAPIReadType::Enum dataType) const;
|
||||
bool getLinkVelocityStates(void* data, const PxArticulationGPUIndex* gpuIndices, PxU32 nbElements, PxU32 maxLinks, PxArticulationGPUAPIReadType::Enum dataType) const;
|
||||
bool getLinkSpatialForceStates(void* data, const PxArticulationGPUIndex* gpuIndices, PxU32 nbElements, PxU32 maxLinks, PxArticulationGPUAPIReadType::Enum dataType) const;
|
||||
|
||||
bool setDofStates(const void* data, const PxArticulationGPUIndex* gpuIndices, PxU32 nbElements, PxU32 maxDofs, PxArticulationGPUAPIWriteType::Enum dataType);
|
||||
bool setRootGlobalPoseStates(const void* data, const PxArticulationGPUIndex* gpuIndices, PxU32 nbElements);
|
||||
bool setRootVelocityStates(const void* data, const PxArticulationGPUIndex* gpuIndices, PxU32 nbElements, PxArticulationGPUAPIWriteType::Enum dataType);
|
||||
bool setLinkForceStates(const void* data, const PxArticulationGPUIndex* gpuIndices, PxU32 nbElements, PxU32 maxLinks);
|
||||
bool setLinkTorqueStates(const void* data, const PxArticulationGPUIndex* gpuIndices, PxU32 nbElements, PxU32 maxLinks);
|
||||
bool setTendonStates(const void* data, const PxArticulationGPUIndex* gpuIndices, PxU32 nbElements, PxU32 maxTendons, PxArticulationGPUAPIWriteType::Enum dataType);
|
||||
bool setSpatialTendonAttachmentStates(const void* data, const PxArticulationGPUIndex* gpuIndices, PxU32 nbElements, PxU32 maxTendonsXmaxAttachments);
|
||||
bool setFixedTendonJointStates(const void* data, const PxArticulationGPUIndex* gpuIndices, PxU32 nbElements, PxU32 maxFixedTendonsXmaxTendonJoints);
|
||||
bool getTendonStates(void* data, const PxArticulationGPUIndex* gpuIndices, PxU32 nbElements, PxU32 maxTendons, PxArticulationGPUAPIReadType::Enum dataType) const;
|
||||
bool getSpatialTendonAttachmentStates(void* data, const PxArticulationGPUIndex* gpuIndices, PxU32 nbElements, PxU32 maxTendonsXmaxAttachments) const;
|
||||
bool getFixedTendonJointStates(void* data, const PxArticulationGPUIndex* gpuIndices, PxU32 nbElements, PxU32 maxFixedTendonsXmaxTendonJoints) const;
|
||||
|
||||
PxgArticulationCoreDesc* mArticulationCoreDesc;
|
||||
PxgArticulationOutputDesc* mArticulationOutputDesc;
|
||||
|
||||
PxgCudaKernelWranglerManager* mGpuKernelWranglerManager;
|
||||
PxCudaContextManager* mCudaContextManager;
|
||||
PxCudaContext* mCudaContext;
|
||||
CUstream mStream;
|
||||
CUstream* mSolverStream;
|
||||
CUevent mFinishEvent;
|
||||
CUevent mFlushArticulationDataEvent;
|
||||
|
||||
PxgGpuContext* mGpuContext;
|
||||
|
||||
PxgTypedCudaBuffer<PxgArticulationCoreDesc> mArticulationCoreDescd;
|
||||
PxgTypedCudaBuffer<PxgArticulationOutputDesc> mArticulationOutputDescd;
|
||||
PxU32 mNbActiveArticulation;
|
||||
|
||||
PxgTypedCudaBuffer<Cm::UnAlignedSpatialVector> mDeltaVs;
|
||||
PxgTypedCudaBuffer<uint2> mSlabHasChanges;
|
||||
PxgTypedCudaBuffer<uint4> mSlabDirtyMasks;
|
||||
|
||||
PxgTypedCudaBuffer<PxgArticulationBitFieldStackData> mPathToRootPerPartition;
|
||||
PxgTypedCudaBuffer<PxU32> mDirtyLinksPerPartition;
|
||||
PxgTypedCudaBuffer<PxReal> mImpulseScalePerPartition;
|
||||
|
||||
PxgTypedCudaBuffer<PxU32> mTempContactUniqueIndicesBlockBuffer;
|
||||
PxgTypedCudaBuffer<PxU32> mTempConstraintUniqueIndicesBlockBuffer;
|
||||
PxgTypedCudaBuffer<PxU32> mTempContactHeaderBlockBuffer;
|
||||
PxgTypedCudaBuffer<PxU32> mTempConstraintHeaderBlockBuffer;
|
||||
|
||||
PxgTypedCudaBuffer<PxU32> mTempSelfContactUniqueIndicesBlockBuffer;
|
||||
PxgTypedCudaBuffer<PxU32> mTempSelfConstraintUniqueIndicesBlockBuffer;
|
||||
PxgTypedCudaBuffer<PxU32> mTempSelfContactHeaderBlockBuffer;
|
||||
PxgTypedCudaBuffer<PxU32> mTempSelfConstraintHeaderBlockBuffer;
|
||||
|
||||
CUevent mComputeUnconstrainedEvent;
|
||||
|
||||
bool mNeedsKinematicUpdate;
|
||||
|
||||
#if PX_SUPPORT_OMNI_PVD
|
||||
void ovdArticulationCallback(const void* PX_RESTRICT data, const PxRigidDynamicGPUIndex* PX_RESTRICT gpuIndices,
|
||||
PxArticulationGPUAPIWriteType::Enum dataType, PxU32 nbElements,
|
||||
PxU32 maxLinks, PxU32 maxDofs, PxU32 maxFixedTendons, PxU32 maxTendonJoints, PxU32 maxSpatialTendons, PxU32 maxSpatialTendonAttachments);
|
||||
PxPinnedArray<PxU8> mOvdDataBuffer;
|
||||
PxPinnedArray<PxU8> mOvdIndexBuffer;
|
||||
#endif
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
145
engine/third_party/physx/source/gpuarticulation/include/PxgArticulationCoreDesc.h
vendored
Normal file
145
engine/third_party/physx/source/gpuarticulation/include/PxgArticulationCoreDesc.h
vendored
Normal file
@@ -0,0 +1,145 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PXG_ARTICULATION_CORE_DESC_H
|
||||
#define PXG_ARTICULATION_CORE_DESC_H
|
||||
|
||||
#include "foundation/PxSimpleTypes.h"
|
||||
#include "foundation/PxVec3.h"
|
||||
#include "PxgArticulation.h"
|
||||
|
||||
namespace physx
|
||||
{
|
||||
struct PxgBodySim;
|
||||
struct PxgSolverBodySleepData;
|
||||
|
||||
class PxGpuTendonAttachmentData;
|
||||
class PxGpuTendonJointCoefficientData;
|
||||
|
||||
namespace IG
|
||||
{
|
||||
class NodeIndex;
|
||||
};
|
||||
|
||||
namespace Cm
|
||||
{
|
||||
struct UnAlignedSpatialVector;
|
||||
}
|
||||
|
||||
struct PxgArticulationCoreDesc
|
||||
{
|
||||
PxgBodySim* mBodySimBufferDeviceData;
|
||||
PxgSolverBodySleepData* articulationSleepData; //sleep data for the articulation
|
||||
PxNodeIndex* islandNodeIndices;
|
||||
PxU32* solverBodyIndices;
|
||||
|
||||
PxgArticulation* articulations;
|
||||
PxU32 articulationOffset;//offset to the islandNodeIndices
|
||||
PxU32 nbArticulations;
|
||||
PxReal dt;
|
||||
PxVec3 gravity;
|
||||
PxReal invLengthScale;
|
||||
bool isExternalForcesEveryTgsIterationEnabled;
|
||||
|
||||
Cm::UnAlignedSpatialVector* impulses;
|
||||
PxU32 nbSlabs;
|
||||
PxU32 nbPartitions;
|
||||
uint2* slabHasChanges; // one uint2 per articulation per slab. Stores up to two link indices to communicate which links holds an impulse from the solver.
|
||||
uint4* slabDirtyMasks; // one uint4 [linkIndexA, writeIndexA, linkIndexB, writeIndexB] per articulation per partition per slab, index = articulationId + slab * nbArticulations + partitionId * nbArticulations * nbSlabs. Used for internal velocity propagation (artiPropagateVelocityInternal).
|
||||
|
||||
//The dirty paths we need to process for each partition. This defines
|
||||
//how we propagate impulses in the articulations. There is one of these
|
||||
//per partition, per articulation
|
||||
PxgArticulationBitFieldStackData* mPathToRootsPerPartition;
|
||||
//This defines which link is holding the current accumulated impulse. There is
|
||||
//one entry per articulation per partition
|
||||
PxU32* mImpulseHoldingLink;
|
||||
PxReal* mPartitionAverageScale; // One float per partition per articulation. Stores 1/numSlabsInThisPartitionInvolvingThisArticulation, see artiComputeDependencies
|
||||
|
||||
PxgArticulationBlockData* mArticulationBlocks;
|
||||
PxgArticulationBlockLinkData* mArticulationLinkBlocks;
|
||||
PxgArticulationTraversalStackData* mArticulationTraversalStackBlocks;
|
||||
PxgArticulationBitFieldStackData* mTempPathToRootBitFieldBlocks;
|
||||
PxgArticulationBitFieldStackData* mTempSharedBitFieldBlocks;
|
||||
PxgArticulationBitFieldStackData* mTempRootBitFieldBlocks;
|
||||
|
||||
//A quick reminder of the indexing of mPathToRootBitFieldBlocks.
|
||||
//A bitfield is just a PxU64. It can describe the path to root for a single link in an articulation with 64 links.
|
||||
//We support more than 64 links so we need a bitfield array to describe the path to root for a single link.
|
||||
//For each link we need bitfield[maxWordCount] with maxWordCount = (maxLinks+63)/64 to describe the path to the root.
|
||||
//For each articulation we need bitField[maxLinks*maxWordCount] to describe the path to root for all links.
|
||||
//The array of bitfields for an articulation with globalWarpIndex will begin at mPathToRootBitFieldBlocks[globalWarpIndex*(maxLinks*maxWordCount))
|
||||
PxgArticulationBitFieldData* mPathToRootBitFieldBlocks;
|
||||
|
||||
PxgArticulationBlockDofData* mArticulationDofBlocks;
|
||||
|
||||
PxgArticulationBlockSpatialTendonData* mArticulationSpatialTendonBlocks;
|
||||
PxgArticulationInternalTendonConstraintData* mArticulationSpatialTendonConstraintBlocks;
|
||||
PxgArticulationBlockAttachmentData* mArticulationAttachmentBlocks;
|
||||
|
||||
PxgArticulationBlockFixedTendonData* mArticulationFixedTendonBlocks;
|
||||
PxgArticulationInternalTendonConstraintData* mArticulationFixedTendonConstraintBlocks;
|
||||
PxgArticulationBlockTendonJointData* mArticulationTendonJointBlocks;
|
||||
|
||||
PxgArticulationBlockMimicJointData* mArticulationMimicJointBlocks;
|
||||
|
||||
PxU32 mMaxLinksPerArticulation;
|
||||
PxU32 mMaxDofsPerArticulation;
|
||||
PxU32 mMaxMimicJointsPerArticulation;
|
||||
PxU32 mMaxSpatialTendonsPerArticulation;
|
||||
PxU32 mMaxAttachmentPerArticulation;
|
||||
|
||||
PxU32 mMaxFixedTendonsPerArticulation;
|
||||
PxU32 mMaxTendonJointPerArticulation;
|
||||
|
||||
PxU32* mTempContactUniqueIndicesBlock;
|
||||
PxU32* mTempConstraintUniqueIndicesBlock;
|
||||
PxU32* mTempContactHeaderBlock;
|
||||
PxU32* mTempConstraintHeaderBlock;
|
||||
|
||||
PxU32* mTempSelfContactUniqueIndicesBlock;
|
||||
PxU32* mTempSelfConstraintUniqueIndicesBlock;
|
||||
PxU32* mTempSelfContactHeaderBlock;
|
||||
PxU32* mTempSelfConstraintHeaderBlock;
|
||||
|
||||
Dy::ErrorAccumulator mContactErrorAccumulator;
|
||||
};
|
||||
|
||||
struct PxgArticulationOutputDesc
|
||||
{
|
||||
public:
|
||||
//see PxgArticulationLinkJointRootStateData
|
||||
PxU8* linkAndJointAndRootStateData;
|
||||
//PxReal* jointPosition_Vel_Accel;
|
||||
PxgSolverBodySleepData* sleepData;
|
||||
Dy::ErrorAccumulator* errorAccumulator; //Per articulation, collects internal residuals (no contacts or external PxJoints connected to the articulation)
|
||||
Dy::ErrorAccumulator* contactResidualAccumulator; //Only one value accumulating contact residuals over all articulations
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
76
engine/third_party/physx/source/gpuarticulation/include/PxgArticulationCoreKernelIndices.h
vendored
Normal file
76
engine/third_party/physx/source/gpuarticulation/include/PxgArticulationCoreKernelIndices.h
vendored
Normal file
@@ -0,0 +1,76 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef PXG_ARTICULATION_CORE_KERNEL_INDICES_H
|
||||
#define PXG_ARTICULATION_CORE_KERNEL_INDICES_H
|
||||
|
||||
namespace physx
|
||||
{
|
||||
|
||||
struct PxgArticulationCoreKernelBlockDim
|
||||
{
|
||||
enum
|
||||
{
|
||||
COMPUTE_UNCONSTRAINED_VELOCITES = 64,
|
||||
UPDATE_BODIES = 128,
|
||||
SOLVE_INTERNAL_CONSTRAINTS = 32,
|
||||
COMPUTE_UNCONSTRAINED_SPATIAL_INERTIA = 32,
|
||||
COMPUTE_UNCONSTRAINED_SPATIAL_INERTIA_PARTIAL = 64,
|
||||
COMPUTE_STATIC_CONTACT_CONSTRAINT_COUNT = 512,
|
||||
APPLY_ARTI_STATE = 512,
|
||||
ARTI_GET_DOF_STATES = 512,
|
||||
ARTI_GET_TRANSFORM_STATES = 512,
|
||||
ARTI_GET_VELOCITY_STATES = 480,
|
||||
ARTI_GET_SPATIAL_FORCE_STATES = 512,
|
||||
ARTI_GET_TENDON_STATE = 512,
|
||||
ARTI_GET_SPATIAL_TENDON_ATTACHMENT_STATE = 512,
|
||||
ARTI_GET_FIXED_TENDON_JOINT_STATE = 512,
|
||||
ARTI_SET_DOF_STATES = 512,
|
||||
ARTI_SET_ROOT_GLOBAL_POSE_STATE = 512,
|
||||
ARTI_SET_ROOT_VELOCITY_STATE = 480,
|
||||
ARTI_SET_LINK_FORCE_STATE = 480,
|
||||
ARTI_SET_LINK_TORQUE_STATE = 512,
|
||||
ARTI_SET_TENDON_STATE = 512,
|
||||
ARTI_SET_SPATIAL_TENDON_ATTACHMENT_STATE = 512,
|
||||
ARTI_SET_FIXED_TENDON_JOINT_STATE = 512
|
||||
};
|
||||
};
|
||||
|
||||
struct PxgArticulationCoreKernelGridDim
|
||||
{
|
||||
enum
|
||||
{
|
||||
COMPUTE_UNCONSTRAINED_VELOCITES = 64,
|
||||
UPDATE_BODIES = 64,
|
||||
UPDATE_KINEMATIC = 1024 // AD: see inline comment in PxgArticulationCore
|
||||
};
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
1110
engine/third_party/physx/source/gpuarticulation/src/CUDA/articulationDirectGpuApi.cu
vendored
Normal file
1110
engine/third_party/physx/source/gpuarticulation/src/CUDA/articulationDirectGpuApi.cu
vendored
Normal file
File diff suppressed because it is too large
Load Diff
449
engine/third_party/physx/source/gpuarticulation/src/CUDA/articulationDynamic.cuh
vendored
Normal file
449
engine/third_party/physx/source/gpuarticulation/src/CUDA/articulationDynamic.cuh
vendored
Normal file
@@ -0,0 +1,449 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
#ifndef __ARTI_DYNAMIC_CUH__
|
||||
#define __ARTI_DYNAMIC_CUH__
|
||||
|
||||
#include "PxgArticulation.h"
|
||||
#include "PxgArticulationLink.h"
|
||||
#include "DyFeatherstoneArticulationUtils.h"
|
||||
#include "DyFeatherstoneArticulation.h"
|
||||
|
||||
//This function stores Q-stZ to mDeferredQstZ
|
||||
static __device__ Cm::UnAlignedSpatialVector propagateImpulseW_0(const PxVec3& childToParent,
|
||||
PxgArticulationBlockDofData* PX_RESTRICT dofData, const Cm::UnAlignedSpatialVector& Z,
|
||||
const PxU32 dofCount, const PxU32 threadIndexInWarp,
|
||||
const PxReal* PX_RESTRICT jointForce = NULL, const PxReal jointForceMultiplier = 1.0f)
|
||||
{
|
||||
Cm::UnAlignedSpatialVector temp = Z;
|
||||
Cm::UnAlignedSpatialVector sas[3];
|
||||
Cm::UnAlignedSpatialVector isInvD[3];
|
||||
PxReal jf[3];
|
||||
|
||||
// the split into two separate loops is an optimization that allows dispatching the loads as early as possible.
|
||||
#pragma unroll 3
|
||||
for (PxU32 ind = 0; ind < 3; ++ind)
|
||||
{
|
||||
if (ind < dofCount)
|
||||
{
|
||||
sas[ind] = loadSpatialVector(dofData[ind].mWorldMotionMatrix, threadIndexInWarp);
|
||||
isInvD[ind] = loadSpatialVector(dofData[ind].mIsInvDW, threadIndexInWarp);
|
||||
jf[ind] = (jointForce ? jointForce[ind] * jointForceMultiplier : 0.0f);
|
||||
}
|
||||
}
|
||||
|
||||
#pragma unroll 3
|
||||
for (PxU32 ind = 0; ind < 3; ++ind)
|
||||
{
|
||||
if (ind < dofCount)
|
||||
{
|
||||
const PxReal stZ = jf[ind] - sas[ind].innerProduct(Z);
|
||||
dofData[ind].mDeferredQstZ[threadIndexInWarp] += stZ;
|
||||
temp += isInvD[ind] * stZ;
|
||||
}
|
||||
}
|
||||
|
||||
//parent space's spatial zero acceleration impulse
|
||||
return Dy::FeatherstoneArticulation::translateSpatialVector(childToParent, temp);
|
||||
}
|
||||
|
||||
static __device__ Cm::UnAlignedSpatialVector propagateImpulseWTemp(const PxVec3& childToParent,
|
||||
PxgArticulationBlockDofData* PX_RESTRICT dofData, const Cm::UnAlignedSpatialVector& Z,
|
||||
const PxU32 dofCount, const PxU32 threadIndexInWarp)
|
||||
{
|
||||
Cm::UnAlignedSpatialVector temp = Z;
|
||||
|
||||
assert(dofCount<=3);
|
||||
for (PxU32 ind = 0; ind < 3; ++ind)
|
||||
{
|
||||
if(ind<dofCount)
|
||||
{
|
||||
const Cm::UnAlignedSpatialVector sa = loadSpatialVector(dofData[ind].mWorldMotionMatrix, threadIndexInWarp);
|
||||
const Cm::UnAlignedSpatialVector isInvD = loadSpatialVector(dofData[ind].mIsInvDW, threadIndexInWarp);
|
||||
const PxReal stZ = -sa.innerProduct(Z);
|
||||
dofData[ind].mTmpQstZ[threadIndexInWarp] += stZ;
|
||||
|
||||
temp += isInvD * stZ;
|
||||
}
|
||||
}
|
||||
|
||||
//parent space's spatial zero acceleration impulse
|
||||
return Dy::FeatherstoneArticulation::translateSpatialVector(childToParent, temp);
|
||||
}
|
||||
|
||||
static __device__ Cm::UnAlignedSpatialVector propagateImpulseW_1(
|
||||
const PxVec3& childToParent,
|
||||
const PxgArticulationBlockDofData* PX_RESTRICT dofData,
|
||||
const Cm::UnAlignedSpatialVector& Z,
|
||||
const PxReal* jointDofImpulses, const PxU32 dofCount,
|
||||
const PxU32 threadIndexInWarp,
|
||||
PxReal* qstZ)
|
||||
{
|
||||
Cm::UnAlignedSpatialVector temp = Z;
|
||||
|
||||
assert(dofCount<=3);
|
||||
for (PxU32 ind = 0; ind < 3; ++ind)
|
||||
{
|
||||
if(ind<dofCount)
|
||||
{
|
||||
const Cm::UnAlignedSpatialVector sa = loadSpatialVector(dofData[ind].mWorldMotionMatrix, threadIndexInWarp);
|
||||
const Cm::UnAlignedSpatialVector isInvD = loadSpatialVector(dofData[ind].mIsInvDW, threadIndexInWarp);
|
||||
const PxReal jointDofImpulse = jointDofImpulses ? jointDofImpulses[ind] : 0.0f;
|
||||
const PxReal QMinusSTZ = jointDofImpulse - sa.innerProduct(Z);
|
||||
qstZ[ind] += QMinusSTZ;
|
||||
|
||||
temp += isInvD * QMinusSTZ;
|
||||
}
|
||||
}
|
||||
|
||||
//parent space's spatial zero acceleration impulse
|
||||
return Dy::FeatherstoneArticulation::translateSpatialVector(childToParent, temp);
|
||||
}
|
||||
|
||||
static __device__ Cm::UnAlignedSpatialVector propagateAccelerationW(const PxVec3& c2p, const float3* invStIsT,
|
||||
const Cm::UnAlignedSpatialVector* motionMatrix, const Cm::UnAlignedSpatialVector& hDeltaV, const PxU32 dofCount,
|
||||
const Cm::UnAlignedSpatialVector* IsW, const PxReal* qstZ)
|
||||
{
|
||||
Cm::UnAlignedSpatialVector pDeltaV = Dy::FeatherstoneArticulation::translateSpatialVector(-c2p, hDeltaV); //parent velocity change
|
||||
|
||||
//Convert parent velocity change into an impulse
|
||||
PxReal tJointDelta[3] = { 0.f, 0.f, 0.f };
|
||||
#pragma unroll 3
|
||||
for (PxU32 ind = 0; ind < 3; ++ind)
|
||||
{
|
||||
if (ind < dofCount)
|
||||
{
|
||||
//stI * pAcceleration
|
||||
const PxReal temp = IsW[ind].innerProduct(pDeltaV);
|
||||
|
||||
tJointDelta[ind] = (qstZ[ind] - temp);
|
||||
}
|
||||
}
|
||||
|
||||
assert(dofCount<=3);
|
||||
for (PxU32 ind = 0; ind < 3; ++ind)
|
||||
{
|
||||
if(ind<dofCount)
|
||||
{
|
||||
const float3 iStIsTi = invStIsT[ind];
|
||||
|
||||
const PxReal jDelta = iStIsTi.x * tJointDelta[0]
|
||||
+ iStIsTi.y * tJointDelta[1]
|
||||
+ iStIsTi.z * tJointDelta[2];
|
||||
|
||||
pDeltaV += motionMatrix[ind] * jDelta;
|
||||
}
|
||||
}
|
||||
|
||||
return pDeltaV;
|
||||
}
|
||||
|
||||
static __device__ Cm::UnAlignedSpatialVector computeSpatialJointDelta(
|
||||
const PxgArticulationBlockDofData* PX_RESTRICT dofData,
|
||||
const PxReal* PX_RESTRICT QSTZMinusISDotTranslatedParentDeltaV, PxReal* PX_RESTRICT jointDeltaDofSpeeds, const PxU32 dofCount,
|
||||
const PxU32 threadIndexInWarp)
|
||||
{
|
||||
Cm::UnAlignedSpatialVector sas[3];
|
||||
|
||||
// the split into two separate loops is an optimization that allows dispatching the loads as early as possible.
|
||||
#pragma unroll 3
|
||||
for (PxU32 ind = 0; ind < 3; ++ind)
|
||||
{
|
||||
if (ind < dofCount)
|
||||
{
|
||||
sas[ind] = loadSpatialVector(dofData[ind].mWorldMotionMatrix, threadIndexInWarp);
|
||||
}
|
||||
}
|
||||
|
||||
Cm::UnAlignedSpatialVector jointSpatialDeltaV(PxVec3(0.f), PxVec3(0.f));
|
||||
#pragma unroll 3
|
||||
for (PxU32 ind = 0; ind < 3; ++ind)
|
||||
{
|
||||
if (ind < dofCount)
|
||||
{
|
||||
const float iStIsTi_x = dofData[ind].mInvStIsT_x[threadIndexInWarp];
|
||||
const float iStIsTi_y = dofData[ind].mInvStIsT_y[threadIndexInWarp];
|
||||
const float iStIsTi_z = dofData[ind].mInvStIsT_z[threadIndexInWarp];
|
||||
|
||||
const PxReal jDelta = iStIsTi_x * QSTZMinusISDotTranslatedParentDeltaV[0]
|
||||
+ iStIsTi_y * QSTZMinusISDotTranslatedParentDeltaV[1]
|
||||
+ iStIsTi_z * QSTZMinusISDotTranslatedParentDeltaV[2];
|
||||
|
||||
if(jointDeltaDofSpeeds)
|
||||
jointDeltaDofSpeeds[ind] = jDelta;
|
||||
|
||||
jointSpatialDeltaV += sas[ind] * jDelta;
|
||||
}
|
||||
}
|
||||
|
||||
return jointSpatialDeltaV;
|
||||
}
|
||||
|
||||
//This function use mDeferredQstZ
|
||||
static __device__ Cm::UnAlignedSpatialVector propagateAccelerationW(const PxVec3& c2p,
|
||||
const PxgArticulationBlockDofData* PX_RESTRICT dofData,
|
||||
const Cm::UnAlignedSpatialVector& hDeltaV,
|
||||
const PxU32 dofCount, PxReal* jointDeltaDofSpeeds, const PxU32 threadIndexInWarp)
|
||||
{
|
||||
const Cm::UnAlignedSpatialVector pDeltaV = Dy::FeatherstoneArticulation::translateSpatialVector(-c2p, hDeltaV); //parent velocity change
|
||||
|
||||
//[(Q - S^T *Z)] - [(I*S).innerProduct(translated(parentDeltaV))]
|
||||
PxReal QSTZMinusISDotTranslatedParentDeltaV[3] = { 0.f, 0.f, 0.f };
|
||||
Cm::UnAlignedSpatialVector IsW;
|
||||
#pragma unroll(3)
|
||||
for (PxU32 ind = 0; ind < 3; ++ind)
|
||||
{
|
||||
if (ind < dofCount)
|
||||
{
|
||||
IsW = loadSpatialVector(dofData[ind].mIsW, threadIndexInWarp);
|
||||
//stI * pAcceleration
|
||||
const PxReal temp = IsW.innerProduct(pDeltaV);
|
||||
|
||||
QSTZMinusISDotTranslatedParentDeltaV[ind] = (dofData[ind].mDeferredQstZ[threadIndexInWarp] - temp);
|
||||
}
|
||||
}
|
||||
|
||||
const Cm::UnAlignedSpatialVector jointSpatialDeltaV = computeSpatialJointDelta(dofData, QSTZMinusISDotTranslatedParentDeltaV, jointDeltaDofSpeeds, dofCount, threadIndexInWarp);
|
||||
|
||||
return pDeltaV + jointSpatialDeltaV;
|
||||
}
|
||||
|
||||
//This function use mTmpQstZ
|
||||
static __device__ Cm::UnAlignedSpatialVector propagateAccelerationWTemp(const PxVec3& c2p,
|
||||
const PxgArticulationBlockDofData* PX_RESTRICT dofData,
|
||||
const Cm::UnAlignedSpatialVector& hDeltaV,
|
||||
const PxU32 dofCount, const PxU32 threadIndexInWarp)
|
||||
{
|
||||
const Cm::UnAlignedSpatialVector pDeltaV = Dy::FeatherstoneArticulation::translateSpatialVector(-c2p, hDeltaV); //parent velocity change
|
||||
|
||||
//[(Q - S^T *Z)] - [(I*S).innerProduct(translated(parentDeltaV))]
|
||||
PxReal QSTZMinusISDotTransaltedParentDeltaV[3] = { 0.f, 0.f, 0.f };
|
||||
|
||||
#pragma unroll(3)
|
||||
for (PxU32 ind = 0; ind < 3; ++ind)
|
||||
{
|
||||
if (ind < dofCount)
|
||||
{
|
||||
const Cm::UnAlignedSpatialVector IsW = loadSpatialVector(dofData[ind].mIsW, threadIndexInWarp);
|
||||
//stI * pAcceleration
|
||||
const PxReal temp = IsW.innerProduct(pDeltaV);
|
||||
|
||||
QSTZMinusISDotTransaltedParentDeltaV[ind] = (dofData[ind].mTmpQstZ[threadIndexInWarp] - temp);
|
||||
}
|
||||
}
|
||||
|
||||
const Cm::UnAlignedSpatialVector jointSpatialDeltaV = computeSpatialJointDelta(dofData, QSTZMinusISDotTransaltedParentDeltaV, NULL, dofCount, threadIndexInWarp);
|
||||
|
||||
return pDeltaV + jointSpatialDeltaV;
|
||||
}
|
||||
|
||||
//This function use qstZ as input
|
||||
static __device__ Cm::UnAlignedSpatialVector propagateAccelerationW(const PxVec3& c2p,
|
||||
const PxgArticulationBlockDofData* PX_RESTRICT dofData,
|
||||
const Cm::UnAlignedSpatialVector& hDeltaV,
|
||||
const PxU32 dofCount, const PxU32 threadIndexInWarp,
|
||||
const PxReal* qstZ)
|
||||
{
|
||||
const Cm::UnAlignedSpatialVector pDeltaV = Dy::FeatherstoneArticulation::translateSpatialVector(-c2p, hDeltaV); //parent velocity change
|
||||
|
||||
//[(Q - S^T *Z)] - [(I*S).innerProduct(translated(parentDeltaV))]
|
||||
PxReal QSTZMinusISDotTransaltedParentDeltaV[3] = { 0.f, 0.f, 0.f };
|
||||
Cm::UnAlignedSpatialVector IsW;
|
||||
#pragma unroll(3)
|
||||
for (PxU32 ind = 0; ind < 3; ++ind)
|
||||
{
|
||||
if (ind < dofCount)
|
||||
{
|
||||
IsW = loadSpatialVector(dofData[ind].mIsW, threadIndexInWarp);
|
||||
//stI * pAcceleration
|
||||
const PxReal temp = IsW.innerProduct(pDeltaV);
|
||||
|
||||
QSTZMinusISDotTransaltedParentDeltaV[ind] = (qstZ[ind] - temp);
|
||||
}
|
||||
}
|
||||
|
||||
const Cm::UnAlignedSpatialVector jointSpatialDeltaV = computeSpatialJointDelta(dofData, QSTZMinusISDotTransaltedParentDeltaV, NULL, dofCount, threadIndexInWarp);
|
||||
|
||||
return pDeltaV + jointSpatialDeltaV;
|
||||
}
|
||||
|
||||
static __device__ Cm::UnAlignedSpatialVector propagateAccelerationW(
|
||||
PxgArticulationBlockLinkData& linkData,
|
||||
PxgArticulationBlockDofData* dofData,
|
||||
const PxU32 dofCount,
|
||||
const Cm::UnAlignedSpatialVector& hDeltaV,
|
||||
const PxU32 threadIndexInWarp)
|
||||
{
|
||||
const float c2px = linkData.mRw_x[threadIndexInWarp];
|
||||
const float c2py = linkData.mRw_y[threadIndexInWarp];
|
||||
const float c2pz = linkData.mRw_z[threadIndexInWarp];
|
||||
|
||||
float3 invStIsT[3];
|
||||
PxReal tJointDelta[3] = { 0.f, 0.f, 0.f };
|
||||
Cm::UnAlignedSpatialVector isW[3];
|
||||
Cm::UnAlignedSpatialVector mMotionMatrix[3];
|
||||
PxReal jVel[3];
|
||||
|
||||
// the split into three separate loops is an optimization that allows dispatching the loads as early as possible.
|
||||
#pragma unroll 3
|
||||
for (PxU32 ind = 0; ind < 3; ++ind)
|
||||
{
|
||||
if (ind < dofCount)
|
||||
{
|
||||
isW[ind] = loadSpatialVector(dofData[ind].mIsW, threadIndexInWarp);
|
||||
tJointDelta[ind] = (dofData[ind].mDeferredQstZ[threadIndexInWarp]);
|
||||
jVel[ind] = dofData[ind].mJointVelocities[threadIndexInWarp];
|
||||
invStIsT[ind] = make_float3(dofData[ind].mInvStIsT_x[threadIndexInWarp], dofData[ind].mInvStIsT_y[threadIndexInWarp], dofData[ind].mInvStIsT_z[threadIndexInWarp]);
|
||||
mMotionMatrix[ind] = loadSpatialVector(dofData[ind].mWorldMotionMatrix, threadIndexInWarp);
|
||||
}
|
||||
}
|
||||
|
||||
Cm::UnAlignedSpatialVector pDeltaV = Dy::FeatherstoneArticulation::translateSpatialVector(PxVec3(-c2px, -c2py, -c2pz), hDeltaV); //parent velocity change
|
||||
|
||||
#pragma unroll 3
|
||||
for (PxU32 ind = 0; ind < 3; ++ind)
|
||||
{
|
||||
if (ind < dofCount)
|
||||
{
|
||||
tJointDelta[ind] -= isW[ind].innerProduct(pDeltaV);
|
||||
}
|
||||
}
|
||||
|
||||
#pragma unroll 3
|
||||
for (PxU32 ind = 0; ind < 3; ++ind)
|
||||
{
|
||||
if (ind < dofCount)
|
||||
{
|
||||
const PxReal jDelta = invStIsT[ind].x * tJointDelta[0] + invStIsT[ind].y * tJointDelta[1]
|
||||
+ invStIsT[ind].z * tJointDelta[2];
|
||||
|
||||
dofData[ind].mJointVelocities[threadIndexInWarp] = jVel[ind] + jDelta;
|
||||
|
||||
pDeltaV += mMotionMatrix[ind] * jDelta;
|
||||
}
|
||||
}
|
||||
|
||||
return pDeltaV;
|
||||
}
|
||||
|
||||
// There is a another version of this function in forwardDynamic2.cu which additionally writes
|
||||
// the link velocity to a global buffer
|
||||
static void __device__ PxcFsFlushVelocity(PxgArticulationBlockData& articulation,
|
||||
PxgArticulationBlockLinkData* PX_RESTRICT artiLinks,
|
||||
PxgArticulationBlockDofData* PX_RESTRICT artiDofs,
|
||||
PxU32 linkCount, bool fixBase, const PxU32 threadIndexInWarp)
|
||||
{
|
||||
Cm::UnAlignedSpatialVector deltaV = Cm::UnAlignedSpatialVector::Zero();
|
||||
Cm::UnAlignedSpatialVector deferredZ = -loadSpatialVector(articulation.mRootDeferredZ, threadIndexInWarp);
|
||||
if (!fixBase)
|
||||
{
|
||||
//ArticulationLink& link = links[0];
|
||||
|
||||
// PT: preload data
|
||||
const Cm::UnAlignedSpatialVector motionVelocity0 = loadSpatialVector(artiLinks[0].mMotionVelocity, threadIndexInWarp);
|
||||
const Cm::UnAlignedSpatialVector solverSpatialDeltaVel0 = loadSpatialVector(artiLinks[0].mSolverSpatialDeltaVel, threadIndexInWarp);
|
||||
|
||||
Dy::SpatialMatrix invInertia;
|
||||
loadSpatialMatrix(articulation.mInvSpatialArticulatedInertia, threadIndexInWarp, invInertia);
|
||||
//deltaV = invInertia * (-loadSpatialVector(artiLinks[0].mDeferredZ, threadIndexInWarp));
|
||||
|
||||
deltaV = invInertia * deferredZ;
|
||||
|
||||
//motionVelocities[0] += deltaV[0];
|
||||
storeSpatialVector(artiLinks[0].mMotionVelocity, motionVelocity0 + deltaV, threadIndexInWarp);
|
||||
//storeSpatialVector(artiLinks[0].mDeferredZ, Cm::UnAlignedSpatialVector::Zero(), threadIndexInWarp);
|
||||
storeSpatialVector(articulation.mRootDeferredZ, Cm::UnAlignedSpatialVector::Zero(), threadIndexInWarp);
|
||||
storeSpatialVector(artiLinks[0].mSolverSpatialDeltaVel, solverSpatialDeltaVel0 + deltaV, threadIndexInWarp);
|
||||
}
|
||||
|
||||
storeSpatialVector(artiLinks[0].mScratchDeltaV, deltaV, threadIndexInWarp);
|
||||
addSpatialVector(artiLinks[0].mConstraintForces, deferredZ, threadIndexInWarp);
|
||||
|
||||
storeSpatialVector(articulation.mCommonLinkDeltaVelocity, Cm::UnAlignedSpatialVector::Zero(), threadIndexInWarp);
|
||||
|
||||
PxgArticulationBlockDofData* dofs = artiDofs;
|
||||
|
||||
if (linkCount > 1)
|
||||
{
|
||||
Cm::UnAlignedSpatialVector nextMotionV = loadSpatialVector(artiLinks[1].mMotionVelocity, threadIndexInWarp);
|
||||
PxU32 nextNbDofs = artiLinks[1].mDofs[threadIndexInWarp];
|
||||
PxU32 nextParent = artiLinks[1].mParents[threadIndexInWarp];
|
||||
//Cm::UnAlignedSpatialVector nextDeferredZ = loadSpatialVector(artiLinks[1].mDeferredZ, threadIndexInWarp);
|
||||
|
||||
for (PxU32 i = 1; i < linkCount; i++)
|
||||
{
|
||||
PxgArticulationBlockLinkData& tLink = artiLinks[i];
|
||||
const PxU32 nbDofs = nextNbDofs;
|
||||
const PxU32 parent = nextParent;
|
||||
|
||||
const Cm::UnAlignedSpatialVector preloadedConstraintForces = loadSpatialVector(tLink.mConstraintForces, threadIndexInWarp);
|
||||
const Cm::UnAlignedSpatialVector preloadedSolverSpatialDeltaVel = loadSpatialVector(tLink.mSolverSpatialDeltaVel, threadIndexInWarp);
|
||||
|
||||
Cm::UnAlignedSpatialVector motionV = nextMotionV;
|
||||
//const Cm::UnAlignedSpatialVector deferredZ = nextDeferredZ;
|
||||
|
||||
//storeSpatialVector(tLink.mDeferredZ, Cm::UnAlignedSpatialVector::Zero(), threadIndexInWarp);
|
||||
|
||||
if ((i + 1) < linkCount)
|
||||
{
|
||||
nextMotionV = loadSpatialVector(artiLinks[i + 1].mMotionVelocity, threadIndexInWarp);
|
||||
nextNbDofs = artiLinks[i + 1].mDofs[threadIndexInWarp];
|
||||
nextParent = artiLinks[i + 1].mParents[threadIndexInWarp];
|
||||
//nextDeferredZ = loadSpatialVector(artiLinks[i + 1].mDeferredZ, threadIndexInWarp);
|
||||
}
|
||||
|
||||
if (parent != (i - 1))
|
||||
deltaV = loadSpatialVector(artiLinks[parent].mScratchDeltaV, threadIndexInWarp);
|
||||
|
||||
deltaV = propagateAccelerationW(tLink, dofs, nbDofs, deltaV, threadIndexInWarp);
|
||||
|
||||
//Accumulate the DeltaVel arising from solver impulses applied to this link.
|
||||
storeSpatialVector(tLink.mSolverSpatialDeltaVel, preloadedSolverSpatialDeltaVel + deltaV, threadIndexInWarp);
|
||||
|
||||
//zeroing mDeferredQstZ
|
||||
for (PxU32 ind = 0; ind < nbDofs; ++ind)
|
||||
{
|
||||
dofs[ind].mDeferredQstZ[threadIndexInWarp] = 0.f;
|
||||
}
|
||||
|
||||
motionV += deltaV;
|
||||
|
||||
storeSpatialVector(tLink.mScratchDeltaV, deltaV, threadIndexInWarp);
|
||||
|
||||
//const PxTransform& tBody2World = poses[i];
|
||||
storeSpatialVector(tLink.mMotionVelocity, motionV, threadIndexInWarp);
|
||||
|
||||
storeSpatialVector(tLink.mConstraintForces, preloadedConstraintForces + deltaV, threadIndexInWarp);
|
||||
|
||||
dofs += nbDofs;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
387
engine/third_party/physx/source/gpuarticulation/src/CUDA/articulationImpulseResponse.cuh
vendored
Normal file
387
engine/third_party/physx/source/gpuarticulation/src/CUDA/articulationImpulseResponse.cuh
vendored
Normal file
@@ -0,0 +1,387 @@
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||||
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||||
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||||
|
||||
|
||||
#ifndef __ARTI_IMPULSE_RESPONSE_CUH__
|
||||
#define __ARTI_IMPULSE_RESPONSE_CUH__
|
||||
|
||||
#include "PxgSolverBody.h"
|
||||
#include "PxSpatialMatrix.h"
|
||||
#include "PxgArticulation.h"
|
||||
#include "PxgArticulationLink.h"
|
||||
#include "DyFeatherstoneArticulationJointData.h"
|
||||
#include "foundation/PxPreprocessor.h"
|
||||
#include "solver/PxSolverDefs.h"
|
||||
#include "MemoryAllocator.cuh"
|
||||
#include "articulationDynamic.cuh"
|
||||
#include "DyFeatherstoneArticulation.h"
|
||||
#include "solverBlock.cuh"
|
||||
|
||||
using namespace physx;
|
||||
|
||||
static __device__ PX_FORCE_INLINE PxU32 articulationLowestSetBit(ArticulationBitField val)
|
||||
{
|
||||
return val == 0 ? 0 : __ffsll(val) - 1;
|
||||
}
|
||||
|
||||
|
||||
static __device__ PX_FORCE_INLINE PxU32 articulationHighestSetBit(ArticulationBitField val)
|
||||
{
|
||||
const PxU32 nbZeros = __clzll(val);
|
||||
return 63 - nbZeros;
|
||||
}
|
||||
|
||||
static __device__ Cm::UnAlignedSpatialVector propagateAccelerationWNoJVelUpdate(
|
||||
const PxgArticulationBlockLinkData& linkData,
|
||||
const PxgArticulationBlockDofData* const dofData,
|
||||
const PxU32 dofCount,
|
||||
const Cm::UnAlignedSpatialVector& hDeltaV,
|
||||
const PxU32 threadIndexInWarp)
|
||||
{
|
||||
const float c2px = linkData.mRw_x[threadIndexInWarp];
|
||||
const float c2py = linkData.mRw_y[threadIndexInWarp];
|
||||
const float c2pz = linkData.mRw_z[threadIndexInWarp];
|
||||
Cm::UnAlignedSpatialVector pDeltaV = Dy::FeatherstoneArticulation::translateSpatialVector(PxVec3(-c2px, -c2py, -c2pz), hDeltaV); //parent velocity change
|
||||
|
||||
float3 invStIsT[3];
|
||||
PxReal tJointDelta[3] = { 0.f, 0.f, 0.f };
|
||||
Cm::UnAlignedSpatialVector isW[3];
|
||||
Cm::UnAlignedSpatialVector motionMatrix[3];
|
||||
|
||||
// the split into three separate loops is an optimization that allows dispatching the loads as early as possible.
|
||||
#pragma unroll 3
|
||||
for (PxU32 ind = 0; ind < 3; ++ind)
|
||||
{
|
||||
if (ind < dofCount)
|
||||
{
|
||||
isW[ind] = loadSpatialVector(dofData[ind].mIsW, threadIndexInWarp);
|
||||
invStIsT[ind] = make_float3(dofData[ind].mInvStIsT_x[threadIndexInWarp], dofData[ind].mInvStIsT_y[threadIndexInWarp], dofData[ind].mInvStIsT_z[threadIndexInWarp]);
|
||||
tJointDelta[ind] = dofData[ind].mDeferredQstZ[threadIndexInWarp];
|
||||
motionMatrix[ind] = loadSpatialVector(dofData[ind].mWorldMotionMatrix, threadIndexInWarp);
|
||||
}
|
||||
}
|
||||
|
||||
#pragma unroll 3
|
||||
for (PxU32 ind = 0; ind < 3; ++ind)
|
||||
{
|
||||
if (ind < dofCount)
|
||||
{
|
||||
tJointDelta[ind] -= isW[ind].innerProduct(pDeltaV);
|
||||
}
|
||||
}
|
||||
|
||||
#pragma unroll 3
|
||||
for (PxU32 ind = 0; ind < 3; ++ind)
|
||||
{
|
||||
if (ind < dofCount)
|
||||
{
|
||||
const PxReal jDelta = invStIsT[ind].x * tJointDelta[0] + invStIsT[ind].y * tJointDelta[1]
|
||||
+ invStIsT[ind].z * tJointDelta[2];
|
||||
|
||||
pDeltaV += motionMatrix[ind] * jDelta;
|
||||
}
|
||||
}
|
||||
|
||||
return pDeltaV;
|
||||
}
|
||||
|
||||
|
||||
static __device__ PX_FORCE_INLINE void averageLinkImpulsesAndPropagate(uint2* PX_RESTRICT isSlabDirty, const Cm::UnAlignedSpatialVector* PX_RESTRICT impulses,
|
||||
PxgArticulationBlockData& PX_RESTRICT articulationBlock, PxgArticulationBlockLinkData* PX_RESTRICT artiLinks,
|
||||
PxgArticulationBlockDofData* const PX_RESTRICT artiDofs,
|
||||
const PxU32 articulationId, const PxU32 maxLinks, const PxU32 nbArticulations, const PxU32 nbSlabs,
|
||||
const PxU32 nbLinks, const PxU32 threadIndexInWarp, PxReal scale = 0.0f)
|
||||
{
|
||||
const PxU32 slabStepSize = maxLinks * nbArticulations;
|
||||
|
||||
if (scale == 0.0f) // When scale is not input, compute the scale here. This method is not momentum-conserving.
|
||||
{
|
||||
scale = 1.0f;
|
||||
if (nbSlabs > 1)
|
||||
{
|
||||
PxReal count = 0.0f;
|
||||
for (PxU32 s = 0, slabDirtyIndex = articulationId; s < nbSlabs; ++s, slabDirtyIndex += nbArticulations)
|
||||
{
|
||||
const uint2 dirty = isSlabDirty[slabDirtyIndex];
|
||||
if (dirty.x != 0xFFFFFFFF || dirty.y != 0xFFFFFFFF)
|
||||
{
|
||||
count += 1.0f;
|
||||
}
|
||||
}
|
||||
|
||||
scale = count > 1.0f ? 1.0f / count : 1.0f;
|
||||
}
|
||||
}
|
||||
|
||||
PxU32 maxIndex = articulationBlock.mLinkWithDeferredImpulse[threadIndexInWarp];
|
||||
for (PxU32 s = 0, slabOffset = articulationId, slabDirtyIndex = articulationId; s < nbSlabs; ++s, slabOffset += slabStepSize, slabDirtyIndex += nbArticulations)
|
||||
{
|
||||
const uint2 dirtyIndex2 = isSlabDirty[slabDirtyIndex];
|
||||
for(PxU32 i = 0, dirtyIndex = dirtyIndex2.x; i < 2; ++i, dirtyIndex = dirtyIndex2.y)
|
||||
{
|
||||
//PxU32 dirtyIndex = i == 0 ? dirtyIndex2.x : dirtyIndex2.y;
|
||||
if (dirtyIndex != 0xFFFFFFFF)
|
||||
{
|
||||
const Cm::UnAlignedSpatialVector preloadedScratchImpulse = loadSpatialVector(artiLinks[dirtyIndex].mScratchImpulse, threadIndexInWarp);
|
||||
|
||||
//Get the index of the dirty slab...
|
||||
const PxU32 deltaIdx = slabOffset + dirtyIndex * nbArticulations;
|
||||
|
||||
//Cm::UnAlignedSpatialVector impulse = impulses[deltaIdx] * scale;
|
||||
Cm::UnAlignedSpatialVector impulse;
|
||||
|
||||
float4* f4;
|
||||
float2* f2;
|
||||
|
||||
size_t ptr = reinterpret_cast<size_t>(&impulses[deltaIdx]);
|
||||
|
||||
if (ptr & 0xf)
|
||||
{
|
||||
f2 = reinterpret_cast<float2*>(ptr);
|
||||
f4 = reinterpret_cast<float4*>(f2 + 1);
|
||||
|
||||
const float2 v0 = *f2;
|
||||
const float4 v1 = *f4;
|
||||
|
||||
impulse.top.x = v0.x; impulse.top.y = v0.y; impulse.top.z = v1.x;
|
||||
impulse.bottom.x = v1.y; impulse.bottom.y = v1.z; impulse.bottom.z = v1.w;
|
||||
}
|
||||
else
|
||||
{
|
||||
f4 = reinterpret_cast<float4*>(ptr);
|
||||
f2 = reinterpret_cast<float2*>(f4 + 1);
|
||||
const float4 v0 = *f4;
|
||||
const float2 v1 = *f2;
|
||||
impulse.top.x = v0.x; impulse.top.y = v0.y; impulse.top.z = v0.z;
|
||||
impulse.bottom.x = v0.w; impulse.bottom.y = v1.x; impulse.bottom.z = v1.y;
|
||||
}
|
||||
|
||||
*f4 = make_float4(0.f);
|
||||
*f2 = make_float2(0.f);
|
||||
|
||||
//impulses[deltaIdx] = Cm::UnAlignedSpatialVector::Zero();
|
||||
storeSpatialVector(artiLinks[dirtyIndex].mScratchImpulse, preloadedScratchImpulse + impulse*scale, threadIndexInWarp);
|
||||
maxIndex = PxMax(dirtyIndex, maxIndex);
|
||||
}
|
||||
}
|
||||
isSlabDirty[slabDirtyIndex] = make_uint2(0xFFFFFFFF, 0xFFFFFFFF);
|
||||
}
|
||||
|
||||
{
|
||||
const Cm::UnAlignedSpatialVector preloadedRootDeferredZ = loadSpatialVector(articulationBlock.mRootDeferredZ, threadIndexInWarp);
|
||||
|
||||
if (maxIndex)
|
||||
{
|
||||
PxgArticulationBlockDofData* PX_RESTRICT dofData = &artiDofs[artiLinks[maxIndex].mJointOffset[threadIndexInWarp]];
|
||||
|
||||
PxU32 nextParent = artiLinks[maxIndex].mParents[threadIndexInWarp];
|
||||
PxU32 nextDofs = artiLinks[maxIndex].mDofs[threadIndexInWarp];
|
||||
float nextChild2Parentx = artiLinks[maxIndex].mRw_x[threadIndexInWarp];
|
||||
float nextChild2Parenty = artiLinks[maxIndex].mRw_y[threadIndexInWarp];
|
||||
float nextChild2Parentz = artiLinks[maxIndex].mRw_z[threadIndexInWarp];
|
||||
//Cm::UnAlignedSpatialVector nextDeferredZ = loadSpatialVector(artiLinks[maxIndex].mDeferredZ, threadIndexInWarp);
|
||||
|
||||
for (PxU32 linkID = maxIndex; linkID > 0; linkID--)
|
||||
{
|
||||
PxgArticulationBlockLinkData& linkData = artiLinks[linkID];
|
||||
|
||||
//Can't preload because this could have been written to
|
||||
const Cm::UnAlignedSpatialVector Z = loadSpatialVector(linkData.mScratchImpulse, threadIndexInWarp);
|
||||
const Cm::UnAlignedSpatialVector solverSpatialImpulse = loadSpatialVector(linkData.mSolverSpatialImpulse, threadIndexInWarp);
|
||||
|
||||
const PxU32 parent = nextParent;
|
||||
const Cm::UnAlignedSpatialVector parentScratchImpulse = loadSpatialVector(artiLinks[parent].mScratchImpulse, threadIndexInWarp);
|
||||
const float child2Parentx = nextChild2Parentx;
|
||||
const float child2Parenty = nextChild2Parenty;
|
||||
const float child2Parentz = nextChild2Parentz;
|
||||
const PxU32 dofCount = nextDofs;
|
||||
//Cm::UnAlignedSpatialVector deferredZ = nextDeferredZ;
|
||||
|
||||
if (linkID > 1)
|
||||
{
|
||||
PxU32 nextIndex = linkID - 1;
|
||||
nextParent = artiLinks[nextIndex].mParents[threadIndexInWarp];
|
||||
nextDofs = artiLinks[nextIndex].mDofs[threadIndexInWarp];
|
||||
nextChild2Parentx = artiLinks[nextIndex].mRw_x[threadIndexInWarp];
|
||||
nextChild2Parenty = artiLinks[nextIndex].mRw_y[threadIndexInWarp];
|
||||
nextChild2Parentz = artiLinks[nextIndex].mRw_z[threadIndexInWarp];
|
||||
//nextDeferredZ = loadSpatialVector(artiLinks[nextIndex].mDeferredZ, threadIndexInWarp);
|
||||
}
|
||||
|
||||
const Cm::UnAlignedSpatialVector propagatedZ = propagateImpulseW_0(PxVec3(child2Parentx, child2Parenty, child2Parentz),
|
||||
dofData, Z,
|
||||
dofCount, threadIndexInWarp);
|
||||
|
||||
//Accumulate the solver impulses applied to this link.
|
||||
storeSpatialVector(linkData.mSolverSpatialImpulse, solverSpatialImpulse + Z, threadIndexInWarp);
|
||||
|
||||
//KS - we should be able to remove mImpulses once we are 100% certain that we will not have any deferredZ residuals
|
||||
storeSpatialVector(artiLinks[parent].mScratchImpulse, parentScratchImpulse + propagatedZ, threadIndexInWarp);
|
||||
storeSpatialVector(linkData.mScratchImpulse, Cm::UnAlignedSpatialVector::Zero(), threadIndexInWarp);
|
||||
|
||||
dofData -= nextDofs;
|
||||
|
||||
}
|
||||
}
|
||||
// PT: we can't preload artiLinks[0].mScratchImpulse, as it's modified by the above loop
|
||||
const Cm::UnAlignedSpatialVector preloadedRootScratchImpulse = loadSpatialVector(artiLinks[0].mScratchImpulse, threadIndexInWarp);
|
||||
storeSpatialVector(artiLinks[0].mScratchImpulse, Cm::UnAlignedSpatialVector::Zero(), threadIndexInWarp);
|
||||
storeSpatialVector(articulationBlock.mRootDeferredZ, preloadedRootScratchImpulse + preloadedRootDeferredZ, threadIndexInWarp);
|
||||
}
|
||||
}
|
||||
|
||||
//This method works out the largest index (furthest down the tree) link that we can propagate to such that we can accumulate
|
||||
//all impulses acting on the articulation into a single impulse value. We mark this link as dirty and terminate.
|
||||
//The subsequent kernel requesting velocities uses this cached information to compute the velocity for the desired link.
|
||||
//It is hoped that spatial locality of contacts in many cases will result in this avoiding doing a brute force propagate
|
||||
//to all links in the articulation
|
||||
static __device__ void averageLinkImpulsesAndPropagate2(uint2* PX_RESTRICT isSlabDirty, Cm::UnAlignedSpatialVector* PX_RESTRICT impulses,
|
||||
PxgArticulationBlockData& PX_RESTRICT articulationBlock, PxgArticulationBlockLinkData* PX_RESTRICT artiLinks,
|
||||
PxgArticulationBlockDofData* const PX_RESTRICT artiDofs,
|
||||
const PxU32 articulationId, const PxU32 maxLinks, const PxU32 nbArticulations, const PxU32 nbSlabs,
|
||||
const PxU32 nbLinks, const PxU32 threadIndexInWarp, const PxReal scale,
|
||||
PxgArticulationBitFieldStackData* PX_RESTRICT pathToRootPerPartition, const PxU32 wordSize,
|
||||
const PxU32 commonNode, const PxU32 dirtyFlags)
|
||||
{
|
||||
|
||||
if (dirtyFlags & PxgArtiStateDirtyFlag::eHAS_IMPULSES)
|
||||
{
|
||||
const PxU32 slabStepSize = maxLinks * nbArticulations;
|
||||
|
||||
for (PxU32 s = 0, slabOffset = articulationId, slabDirtyIndex = articulationId; s < nbSlabs; ++s, slabOffset += slabStepSize, slabDirtyIndex += nbArticulations)
|
||||
{
|
||||
const uint2 dirtyIndex2 = isSlabDirty[slabDirtyIndex];
|
||||
for (PxU32 i = 0, dirtyIndex = dirtyIndex2.x; i < 2; ++i, dirtyIndex = dirtyIndex2.y)
|
||||
{
|
||||
//PxU32 dirtyIndex = i == 0 ? dirtyIndex2.x : dirtyIndex2.y;
|
||||
if (dirtyIndex != 0xFFFFFFFF)
|
||||
{
|
||||
const Cm::UnAlignedSpatialVector preloadedScratchImpulse = loadSpatialVector(artiLinks[dirtyIndex].mScratchImpulse, threadIndexInWarp);
|
||||
|
||||
//Get the index of the dirty slab...
|
||||
const PxU32 deltaIdx = slabOffset + dirtyIndex * nbArticulations;
|
||||
Cm::UnAlignedSpatialVector impulse;
|
||||
|
||||
float4* f4;
|
||||
float2* f2;
|
||||
|
||||
size_t ptr = reinterpret_cast<size_t>(&impulses[deltaIdx]);
|
||||
|
||||
if (ptr & 0xf)
|
||||
{
|
||||
f2 = reinterpret_cast<float2*>(ptr);
|
||||
f4 = reinterpret_cast<float4*>(f2 + 1);
|
||||
|
||||
const float2 v0 = *f2;
|
||||
const float4 v1 = *f4;
|
||||
|
||||
impulse.top.x = v0.x; impulse.top.y = v0.y; impulse.top.z = v1.x;
|
||||
impulse.bottom.x = v1.y; impulse.bottom.y = v1.z; impulse.bottom.z = v1.w;
|
||||
}
|
||||
else
|
||||
{
|
||||
f4 = reinterpret_cast<float4*>(ptr);
|
||||
f2 = reinterpret_cast<float2*>(f4 + 1);
|
||||
const float4 v0 = *f4;
|
||||
const float2 v1 = *f2;
|
||||
impulse.top.x = v0.x; impulse.top.y = v0.y; impulse.top.z = v0.z;
|
||||
impulse.bottom.x = v0.w; impulse.bottom.y = v1.x; impulse.bottom.z = v1.y;
|
||||
}
|
||||
|
||||
*f4 = make_float4(0.f);
|
||||
*f2 = make_float2(0.f);
|
||||
|
||||
impulse = impulse*scale;
|
||||
|
||||
storeSpatialVector(artiLinks[dirtyIndex].mScratchImpulse, preloadedScratchImpulse + impulse, threadIndexInWarp);
|
||||
}
|
||||
}
|
||||
isSlabDirty[slabDirtyIndex] = make_uint2(0xFFFFFFFF, 0xFFFFFFFF);
|
||||
}
|
||||
}
|
||||
|
||||
// Traverse up from last to front...
|
||||
for (PxI32 j = wordSize-1, bitOffset = (wordSize-1)*64; j >= 0; j--, bitOffset -= 64)
|
||||
{
|
||||
PxU64 word = pathToRootPerPartition[j].bitField[threadIndexInWarp];
|
||||
if (word != 0)
|
||||
{
|
||||
while (word)
|
||||
{
|
||||
PxU32 bitIndex = articulationHighestSetBit(word);
|
||||
|
||||
const PxU32 index = bitIndex + bitOffset;
|
||||
|
||||
if (index == commonNode)
|
||||
break; //We reached the common node so terminate the traversal
|
||||
|
||||
word &= (~(1ull << bitIndex)); //Clear this bit
|
||||
|
||||
PxgArticulationBlockLinkData& linkData = artiLinks[index];
|
||||
|
||||
PxgArticulationBlockDofData* PX_RESTRICT dofData = &artiDofs[linkData.mJointOffset[threadIndexInWarp]];
|
||||
|
||||
const PxU32 parent = linkData.mParents[threadIndexInWarp];
|
||||
const float child2Parentx = linkData.mRw_x[threadIndexInWarp];
|
||||
const float child2Parenty = linkData.mRw_y[threadIndexInWarp];
|
||||
const float child2Parentz = linkData.mRw_z[threadIndexInWarp];
|
||||
const PxU32 dofCount = linkData.mDofs[threadIndexInWarp];
|
||||
|
||||
const Cm::UnAlignedSpatialVector Z = loadSpatialVector(linkData.mScratchImpulse, threadIndexInWarp);
|
||||
const Cm::UnAlignedSpatialVector parentScratchImpulse = loadSpatialVector(artiLinks[parent].mScratchImpulse, threadIndexInWarp);
|
||||
const Cm::UnAlignedSpatialVector solverSpatialImpulse = loadSpatialVector(linkData.mSolverSpatialImpulse, threadIndexInWarp);
|
||||
|
||||
const Cm::UnAlignedSpatialVector propagatedZ = propagateImpulseW_0(PxVec3(child2Parentx, child2Parenty, child2Parentz),
|
||||
dofData, Z,
|
||||
dofCount, threadIndexInWarp);
|
||||
|
||||
//Accumulate the solver impulses applied to this link.
|
||||
storeSpatialVector(linkData.mSolverSpatialImpulse, solverSpatialImpulse + Z, threadIndexInWarp);
|
||||
|
||||
//KS - we should be able to remove mImpulses once we are 100% certain that we will not have any deferredZ residuals
|
||||
storeSpatialVector(artiLinks[parent].mScratchImpulse, parentScratchImpulse + propagatedZ, threadIndexInWarp);
|
||||
storeSpatialVector(linkData.mScratchImpulse, Cm::UnAlignedSpatialVector::Zero(), threadIndexInWarp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//(1) Compute updated link velocity...
|
||||
PxSpatialMatrix mat;
|
||||
loadSpatialMatrix(artiLinks[commonNode].mSpatialResponseMatrix, threadIndexInWarp, mat);
|
||||
|
||||
const Cm::UnAlignedSpatialVector deltaV = mat * (-loadSpatialVector(artiLinks[commonNode].mScratchImpulse, threadIndexInWarp));
|
||||
// It is important that we DO NOT reset the artiLinks[commonNode].mScratchImpulse to zero here because it has not been propagated
|
||||
// to the root (contrary to the subtree of the commonNode, whose impulses we just propagated).
|
||||
// The resetting of the artiLinks[commonNode].mScratchImpulse will be done eventually by the averageLinkImpulsesAndPropagate function
|
||||
// that goes all the way to the root.
|
||||
|
||||
storeSpatialVector(articulationBlock.mCommonLinkDeltaVelocity, deltaV, threadIndexInWarp);
|
||||
articulationBlock.mLinkWithDeferredImpulse[threadIndexInWarp] = commonNode;
|
||||
}
|
||||
|
||||
#endif
|
||||
4230
engine/third_party/physx/source/gpuarticulation/src/CUDA/forwardDynamic2.cu
vendored
Normal file
4230
engine/third_party/physx/source/gpuarticulation/src/CUDA/forwardDynamic2.cu
vendored
Normal file
File diff suppressed because it is too large
Load Diff
4012
engine/third_party/physx/source/gpuarticulation/src/CUDA/internalConstraints2.cu
vendored
Normal file
4012
engine/third_party/physx/source/gpuarticulation/src/CUDA/internalConstraints2.cu
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1149
engine/third_party/physx/source/gpuarticulation/src/CUDA/inverseDynamic.cu
vendored
Normal file
1149
engine/third_party/physx/source/gpuarticulation/src/CUDA/inverseDynamic.cu
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1932
engine/third_party/physx/source/gpuarticulation/src/PxgArticulationCore.cpp
vendored
Normal file
1932
engine/third_party/physx/source/gpuarticulation/src/PxgArticulationCore.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user