977 lines
40 KiB
C
977 lines
40 KiB
C
|
|
// Redistribution and use in source and binary forms, with or without
|
||
|
|
// modification, are permitted provided that the following conditions
|
||
|
|
// are met:
|
||
|
|
// * Redistributions of source code must retain the above copyright
|
||
|
|
// notice, this list of conditions and the following disclaimer.
|
||
|
|
// * Redistributions in binary form must reproduce the above copyright
|
||
|
|
// notice, this list of conditions and the following disclaimer in the
|
||
|
|
// documentation and/or other materials provided with the distribution.
|
||
|
|
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||
|
|
// contributors may be used to endorse or promote products derived
|
||
|
|
// from this software without specific prior written permission.
|
||
|
|
//
|
||
|
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||
|
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||
|
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||
|
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||
|
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||
|
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||
|
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||
|
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||
|
|
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||
|
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||
|
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||
|
|
//
|
||
|
|
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||
|
|
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||
|
|
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||
|
|
|
||
|
|
#ifndef PXG_ARTICULATION_H
|
||
|
|
#define PXG_ARTICULATION_H
|
||
|
|
|
||
|
|
#include "foundation/PxSimpleTypes.h"
|
||
|
|
#include "PxArticulationReducedCoordinate.h"
|
||
|
|
#include "PxgArticulationLink.h"
|
||
|
|
#include "PxgArticulationTendon.h"
|
||
|
|
#include "PxgCudaBuffer.h"
|
||
|
|
#include "PxSpatialMatrix.h"
|
||
|
|
#include "DyFeatherstoneArticulation.h"
|
||
|
|
#include "foundation/PxUserAllocated.h"
|
||
|
|
#include "vector_types.h"
|
||
|
|
|
||
|
|
namespace physx
|
||
|
|
{
|
||
|
|
class PxGpuSpatialTendonData;
|
||
|
|
class PxGpuFixedTendonData;
|
||
|
|
|
||
|
|
namespace Dy
|
||
|
|
{
|
||
|
|
struct ArticulationJointCore;
|
||
|
|
class ArticulationJointCoreData;
|
||
|
|
struct ArticulationInternalConstraint;
|
||
|
|
struct SpatialSubspaceMatrix;
|
||
|
|
}
|
||
|
|
|
||
|
|
//We load/store to Cm::SpatialMatrix rather than PxSpatialMatrix.
|
||
|
|
//This exploits the symmetry of the spatial matrix to avoid storing/loading the bottom-right,
|
||
|
|
//which is the transpose of the top-left
|
||
|
|
struct PxgSpatialMatrixBlock
|
||
|
|
{
|
||
|
|
float4 columns[7][32];
|
||
|
|
};
|
||
|
|
|
||
|
|
//We load/store to a PxSpatialMatrix. This has a full 6x6 matrix as there is no
|
||
|
|
//symmetry as described in the above struct
|
||
|
|
struct PxgSpatialMatrix6x6Block
|
||
|
|
{
|
||
|
|
float4 columns[9][32];
|
||
|
|
};
|
||
|
|
|
||
|
|
struct PxgMat33Block
|
||
|
|
{
|
||
|
|
float4 mCol0[32];
|
||
|
|
float4 mCol1[32];
|
||
|
|
float mCol2[32];
|
||
|
|
};
|
||
|
|
|
||
|
|
struct PxgSpatialVectorBlock
|
||
|
|
{
|
||
|
|
float4 mTopxyz_bx[32];
|
||
|
|
float2 mbyz[32];
|
||
|
|
};
|
||
|
|
|
||
|
|
struct PxgSpatialTransformBlock
|
||
|
|
{
|
||
|
|
float4 q[32];
|
||
|
|
float4 p[32];
|
||
|
|
};
|
||
|
|
|
||
|
|
|
||
|
|
PX_FORCE_INLINE PX_CUDA_CALLABLE void loadSpatialMatrix(const PxgSpatialMatrixBlock& block, const PxU32 threadIndexInWarp, Dy::SpatialMatrix& mat)
|
||
|
|
{
|
||
|
|
float4 val = block.columns[0][threadIndexInWarp];
|
||
|
|
mat.topLeft[0][0] = val.x; mat.topLeft[0][1] = val.y; mat.topLeft[0][2] = val.z; mat.topLeft[1][0] = val.w;
|
||
|
|
val = block.columns[1][threadIndexInWarp];
|
||
|
|
mat.topLeft[1][1] = val.x; mat.topLeft[1][2] = val.y; mat.topLeft[2][0] = val.z; mat.topLeft[2][1] = val.w;
|
||
|
|
val = block.columns[2][threadIndexInWarp];
|
||
|
|
mat.topLeft[2][2] = val.x; mat.topRight[0][0] = val.y; mat.topRight[0][1] = val.z; mat.topRight[0][2] = val.w;
|
||
|
|
val = block.columns[3][threadIndexInWarp];
|
||
|
|
mat.topRight[1][0] = val.x; mat.topRight[1][1] = val.y; mat.topRight[1][2] = val.z; mat.topRight[2][0] = val.w;
|
||
|
|
val = block.columns[4][threadIndexInWarp];
|
||
|
|
mat.topRight[2][1] = val.x; mat.topRight[2][2] = val.y; mat.bottomLeft[0][0] = val.z; mat.bottomLeft[0][1] = val.w;
|
||
|
|
val = block.columns[5][threadIndexInWarp];
|
||
|
|
mat.bottomLeft[0][2] = val.x; mat.bottomLeft[1][0] = val.y; mat.bottomLeft[1][1] = val.z; mat.bottomLeft[1][2] = val.w;
|
||
|
|
val = block.columns[6][threadIndexInWarp];
|
||
|
|
mat.bottomLeft[2][0] = val.x; mat.bottomLeft[2][1] = val.y; mat.bottomLeft[2][2] = val.z;
|
||
|
|
}
|
||
|
|
|
||
|
|
//This loads a 7-vec matrix in which the bottomRight is equal to the transpose of the top left
|
||
|
|
PX_FORCE_INLINE PX_CUDA_CALLABLE void loadSpatialMatrix(const PxgSpatialMatrixBlock& block, const PxU32 threadIndexInWarp, PxSpatialMatrix& mat)
|
||
|
|
{
|
||
|
|
float4 val = block.columns[0][threadIndexInWarp];
|
||
|
|
mat.column[0][0] = val.x; mat.column[0][1] = val.y; mat.column[0][2] = val.z; mat.column[1][0] = val.w;
|
||
|
|
mat.column[3][3] = val.x; mat.column[4][3] = val.y; mat.column[5][3] = val.z; mat.column[3][4] = val.w;
|
||
|
|
val = block.columns[1][threadIndexInWarp];
|
||
|
|
mat.column[1][1] = val.x; mat.column[1][2] = val.y; mat.column[2][0] = val.z; mat.column[2][1] = val.w;
|
||
|
|
mat.column[4][4] = val.x; mat.column[5][4] = val.y; mat.column[3][5] = val.z; mat.column[4][5] = val.w;
|
||
|
|
val = block.columns[2][threadIndexInWarp];
|
||
|
|
mat.column[2][2] = val.x; mat.column[0][3] = val.y; mat.column[0][4] = val.z; mat.column[0][5] = val.w;
|
||
|
|
mat.column[5][5] = val.x;
|
||
|
|
val = block.columns[3][threadIndexInWarp];
|
||
|
|
mat.column[1][3] = val.x; mat.column[1][4] = val.y; mat.column[1][5] = val.z; mat.column[2][3] = val.w;
|
||
|
|
val = block.columns[4][threadIndexInWarp];
|
||
|
|
mat.column[2][4] = val.x; mat.column[2][5] = val.y; mat.column[3][0] = val.z; mat.column[3][1] = val.w;
|
||
|
|
val = block.columns[5][threadIndexInWarp];
|
||
|
|
mat.column[3][2] = val.x; mat.column[4][0] = val.y; mat.column[4][1] = val.z; mat.column[4][2] = val.w;
|
||
|
|
val = block.columns[6][threadIndexInWarp];
|
||
|
|
mat.column[5][0] = val.x; mat.column[5][1] = val.y; mat.column[5][2] = val.z;
|
||
|
|
}
|
||
|
|
|
||
|
|
PX_FORCE_INLINE PX_CUDA_CALLABLE void storeSpatialMatrix(PxgSpatialMatrixBlock& block, const PxU32 threadIndexInWarp, const Dy::SpatialMatrix& mat)
|
||
|
|
{
|
||
|
|
block.columns[0][threadIndexInWarp] = make_float4(mat.topLeft[0][0], mat.topLeft[0][1], mat.topLeft[0][2], mat.topLeft[1][0]);
|
||
|
|
block.columns[1][threadIndexInWarp] = make_float4(mat.topLeft[1][1], mat.topLeft[1][2], mat.topLeft[2][0], mat.topLeft[2][1]);
|
||
|
|
block.columns[2][threadIndexInWarp] = make_float4(mat.topLeft[2][2], mat.topRight[0][0], mat.topRight[0][1], mat.topRight[0][2]);
|
||
|
|
block.columns[3][threadIndexInWarp] = make_float4(mat.topRight[1][0], mat.topRight[1][1], mat.topRight[1][2], mat.topRight[2][0]);
|
||
|
|
block.columns[4][threadIndexInWarp] = make_float4(mat.topRight[2][1], mat.topRight[2][2], mat.bottomLeft[0][0], mat.bottomLeft[0][1]);
|
||
|
|
block.columns[5][threadIndexInWarp] = make_float4(mat.bottomLeft[0][2], mat.bottomLeft[1][0], mat.bottomLeft[1][1], mat.bottomLeft[1][2]);
|
||
|
|
block.columns[6][threadIndexInWarp] = make_float4(mat.bottomLeft[2][0], mat.bottomLeft[2][1], mat.bottomLeft[2][2], 0.f);
|
||
|
|
}
|
||
|
|
|
||
|
|
PX_FORCE_INLINE PX_CUDA_CALLABLE void loadSpatialMatrix(const PxgSpatialMatrix6x6Block& block, const PxU32 threadIndexInWarp, PxSpatialMatrix& mat)
|
||
|
|
{
|
||
|
|
float4 val = block.columns[0][threadIndexInWarp];
|
||
|
|
mat.column[0][0] = val.x; mat.column[0][1] = val.y; mat.column[0][2] = val.z; mat.column[0][3] = val.w;
|
||
|
|
val = block.columns[1][threadIndexInWarp];
|
||
|
|
mat.column[0][4] = val.x; mat.column[0][5] = val.y; mat.column[1][0] = val.z; mat.column[1][1] = val.w;
|
||
|
|
val = block.columns[2][threadIndexInWarp];
|
||
|
|
mat.column[1][2] = val.x; mat.column[1][3] = val.y; mat.column[1][4] = val.z; mat.column[1][5] = val.w;
|
||
|
|
val = block.columns[3][threadIndexInWarp];
|
||
|
|
mat.column[2][0] = val.x; mat.column[2][1] = val.y; mat.column[2][2] = val.z; mat.column[2][3] = val.w;
|
||
|
|
val = block.columns[4][threadIndexInWarp];
|
||
|
|
mat.column[2][4] = val.x; mat.column[2][5] = val.y; mat.column[3][0] = val.z; mat.column[3][1] = val.w;
|
||
|
|
val = block.columns[5][threadIndexInWarp];
|
||
|
|
mat.column[3][2] = val.x; mat.column[3][3] = val.y; mat.column[3][4] = val.z; mat.column[3][5] = val.w;
|
||
|
|
val = block.columns[6][threadIndexInWarp];
|
||
|
|
mat.column[4][0] = val.x; mat.column[4][1] = val.y; mat.column[4][2] = val.z; mat.column[4][3] = val.w;
|
||
|
|
val = block.columns[7][threadIndexInWarp];
|
||
|
|
mat.column[4][4] = val.x; mat.column[4][5] = val.y; mat.column[5][0] = val.z; mat.column[5][1] = val.w;
|
||
|
|
val = block.columns[8][threadIndexInWarp];
|
||
|
|
mat.column[5][2] = val.x; mat.column[5][3] = val.y; mat.column[5][4] = val.z; mat.column[5][5] = val.w;
|
||
|
|
}
|
||
|
|
|
||
|
|
PX_FORCE_INLINE PX_CUDA_CALLABLE void storeSpatialMatrix(PxgSpatialMatrix6x6Block& block, const PxU32 threadIndexInWarp, const PxSpatialMatrix& mat)
|
||
|
|
{
|
||
|
|
block.columns[0][threadIndexInWarp] = make_float4(mat.column[0][0], mat.column[0][1], mat.column[0][2], mat.column[0][3]);
|
||
|
|
block.columns[1][threadIndexInWarp] = make_float4(mat.column[0][4], mat.column[0][5], mat.column[1][0], mat.column[1][1]);
|
||
|
|
block.columns[2][threadIndexInWarp] = make_float4(mat.column[1][2], mat.column[1][3], mat.column[1][4], mat.column[1][5]);
|
||
|
|
block.columns[3][threadIndexInWarp] = make_float4(mat.column[2][0], mat.column[2][1], mat.column[2][2], mat.column[2][3]);
|
||
|
|
block.columns[4][threadIndexInWarp] = make_float4(mat.column[2][4], mat.column[2][5], mat.column[3][0], mat.column[3][1]);
|
||
|
|
block.columns[5][threadIndexInWarp] = make_float4(mat.column[3][2], mat.column[3][3], mat.column[3][4], mat.column[3][5]);
|
||
|
|
block.columns[6][threadIndexInWarp] = make_float4(mat.column[4][0], mat.column[4][1], mat.column[4][2], mat.column[4][3]);
|
||
|
|
block.columns[7][threadIndexInWarp] = make_float4(mat.column[4][4], mat.column[4][5], mat.column[5][0], mat.column[5][1]);
|
||
|
|
block.columns[8][threadIndexInWarp] = make_float4(mat.column[5][2], mat.column[5][3], mat.column[5][4], mat.column[5][5]);
|
||
|
|
}
|
||
|
|
|
||
|
|
PX_FORCE_INLINE PX_CUDA_CALLABLE void zeroSpatialMatrix(PxgSpatialMatrix6x6Block& block, const PxU32 threadIndexInWarp)
|
||
|
|
{
|
||
|
|
block.columns[0][threadIndexInWarp] = make_float4(0.f, 0.f, 0.f, 0.f);
|
||
|
|
block.columns[1][threadIndexInWarp] = make_float4(0.f, 0.f, 0.f, 0.f);
|
||
|
|
block.columns[2][threadIndexInWarp] = make_float4(0.f, 0.f, 0.f, 0.f);
|
||
|
|
block.columns[3][threadIndexInWarp] = make_float4(0.f, 0.f, 0.f, 0.f);
|
||
|
|
block.columns[4][threadIndexInWarp] = make_float4(0.f, 0.f, 0.f, 0.f);
|
||
|
|
block.columns[5][threadIndexInWarp] = make_float4(0.f, 0.f, 0.f, 0.f);
|
||
|
|
block.columns[6][threadIndexInWarp] = make_float4(0.f, 0.f, 0.f, 0.f);
|
||
|
|
block.columns[7][threadIndexInWarp] = make_float4(0.f, 0.f, 0.f, 0.f);
|
||
|
|
block.columns[8][threadIndexInWarp] = make_float4(0.f, 0.f, 0.f, 0.f);
|
||
|
|
}
|
||
|
|
|
||
|
|
PX_FORCE_INLINE PX_CUDA_CALLABLE Cm::SpatialVectorF loadSpatialVectorF(const PxgSpatialVectorBlock& block, const PxU32 threadIndexInWarp)
|
||
|
|
{
|
||
|
|
float4 top = block.mTopxyz_bx[threadIndexInWarp];
|
||
|
|
float2 bottom = block.mbyz[threadIndexInWarp];
|
||
|
|
|
||
|
|
return Cm::SpatialVectorF(PxVec3(top.x, top.y, top.z), PxVec3(top.w, bottom.x, bottom.y));
|
||
|
|
}
|
||
|
|
|
||
|
|
PX_FORCE_INLINE PX_CUDA_CALLABLE Cm::UnAlignedSpatialVector loadSpatialVector(const PxgSpatialVectorBlock& block, const PxU32 threadIndexInWarp)
|
||
|
|
{
|
||
|
|
float4 top = block.mTopxyz_bx[threadIndexInWarp];
|
||
|
|
float2 bottom = block.mbyz[threadIndexInWarp];
|
||
|
|
|
||
|
|
return Cm::UnAlignedSpatialVector(PxVec3(top.x, top.y, top.z), PxVec3(top.w, bottom.x, bottom.y));
|
||
|
|
}
|
||
|
|
|
||
|
|
PX_FORCE_INLINE PX_CUDA_CALLABLE void storeSpatialVector(PxgSpatialVectorBlock& block, const Cm::UnAlignedSpatialVector& v, const PxU32 threadIndexInWarp)
|
||
|
|
{
|
||
|
|
block.mTopxyz_bx[threadIndexInWarp] = make_float4(v.top.x, v.top.y, v.top.z, v.bottom.x);
|
||
|
|
block.mbyz[threadIndexInWarp] = make_float2(v.bottom.y, v.bottom.z);
|
||
|
|
|
||
|
|
/*Pxstcg(&block.mTopxyz_bx[threadIndexInWarp], make_float4(v.top.x, v.top.y, v.top.z, v.bottom.x));
|
||
|
|
Pxstcg(&block.mbyz[threadIndexInWarp], make_float2(v.bottom.y, v.bottom.z));*/
|
||
|
|
}
|
||
|
|
|
||
|
|
PX_FORCE_INLINE PX_CUDA_CALLABLE void addSpatialVector(PxgSpatialVectorBlock& block, const Cm::UnAlignedSpatialVector& v, const PxU32 threadIndexInWarp)
|
||
|
|
{
|
||
|
|
block.mTopxyz_bx[threadIndexInWarp] += make_float4(v.top.x, v.top.y, v.top.z, v.bottom.x);
|
||
|
|
block.mbyz[threadIndexInWarp] += make_float2(v.bottom.y, v.bottom.z);
|
||
|
|
|
||
|
|
/*Pxstcg(&block.mTopxyz_bx[threadIndexInWarp], make_float4(v.top.x, v.top.y, v.top.z, v.bottom.x));
|
||
|
|
Pxstcg(&block.mbyz[threadIndexInWarp], make_float2(v.bottom.y, v.bottom.z));*/
|
||
|
|
}
|
||
|
|
|
||
|
|
#if PX_CUDA_COMPILER
|
||
|
|
PX_FORCE_INLINE __device__ void atomicAddSpatialVector(PxgSpatialVectorBlock& block, const Cm::UnAlignedSpatialVector& v, const PxU32 threadIndexInWarp)
|
||
|
|
{
|
||
|
|
#if __CUDA_ARCH__ >= 200
|
||
|
|
atomicAdd(&block.mTopxyz_bx[threadIndexInWarp].x, v.top.x);
|
||
|
|
atomicAdd(&block.mTopxyz_bx[threadIndexInWarp].y, v.top.y);
|
||
|
|
atomicAdd(&block.mTopxyz_bx[threadIndexInWarp].z, v.top.z);
|
||
|
|
|
||
|
|
atomicAdd(&block.mTopxyz_bx[threadIndexInWarp].w, v.bottom.x);
|
||
|
|
atomicAdd(&block.mbyz[threadIndexInWarp].x, v.bottom.y);
|
||
|
|
atomicAdd(&block.mbyz[threadIndexInWarp].y, v.bottom.z);
|
||
|
|
#else
|
||
|
|
PX_UNUSED(block);
|
||
|
|
PX_UNUSED(v);
|
||
|
|
PX_UNUSED(threadIndexInWarp);
|
||
|
|
#endif
|
||
|
|
|
||
|
|
/*Pxstcg(&block.mTopxyz_bx[threadIndexInWarp], make_float4(v.top.x, v.top.y, v.top.z, v.bottom.x));
|
||
|
|
Pxstcg(&block.mbyz[threadIndexInWarp], make_float2(v.bottom.y, v.bottom.z));*/
|
||
|
|
}
|
||
|
|
#endif
|
||
|
|
|
||
|
|
PX_FORCE_INLINE PX_CUDA_CALLABLE PxTransform loadSpatialTransform(const PxgSpatialTransformBlock& block, const PxU32 threadIndexInWarp)
|
||
|
|
{
|
||
|
|
const float4 q = block.q[threadIndexInWarp];
|
||
|
|
const float4 p = block.p[threadIndexInWarp];
|
||
|
|
|
||
|
|
return PxTransform(PxVec3(p.x, p.y, p.z), PxQuat(q.x, q.y, q.z, q.w));
|
||
|
|
}
|
||
|
|
|
||
|
|
PX_FORCE_INLINE PX_CUDA_CALLABLE void storeSpatialTransform(PxgSpatialTransformBlock& block, const PxU32 threadIndexInWarp, const PxTransform& transform)
|
||
|
|
{
|
||
|
|
block.q[threadIndexInWarp] = make_float4(transform.q.x, transform.q.y, transform.q.z, transform.q.w);
|
||
|
|
block.p[threadIndexInWarp] = make_float4(transform.p.x, transform.p.y, transform.p.z, 0.f);
|
||
|
|
}
|
||
|
|
|
||
|
|
PX_FORCE_INLINE PX_CUDA_CALLABLE PxQuat loadQuat(const float4* f, const PxU32 threadIndexInWarp)
|
||
|
|
{
|
||
|
|
float4 q = f[threadIndexInWarp];
|
||
|
|
return PxQuat(q.x, q.y, q.z, q.w);
|
||
|
|
}
|
||
|
|
|
||
|
|
PX_FORCE_INLINE PX_CUDA_CALLABLE PxVec3 loadPxVec3(const float4* f, const PxU32 threadIndexInWarp)
|
||
|
|
{
|
||
|
|
float4 v = f[threadIndexInWarp];
|
||
|
|
return PxVec3(v.x, v.y, v.z);
|
||
|
|
}
|
||
|
|
|
||
|
|
PX_FORCE_INLINE PX_CUDA_CALLABLE void loadPxMat33(const PxgMat33Block& block, const PxU32 threadIndexInWarp, PxMat33& mat)
|
||
|
|
{
|
||
|
|
float4 v0 = block.mCol0[threadIndexInWarp];
|
||
|
|
float4 v1 = block.mCol1[threadIndexInWarp];
|
||
|
|
float v2 = block.mCol2[threadIndexInWarp];
|
||
|
|
|
||
|
|
mat.column0.x = v0.x; mat.column0.y = v0.y; mat.column0.z = v0.z;
|
||
|
|
mat.column1.x = v0.w; mat.column1.y = v1.x; mat.column1.z = v1.y;
|
||
|
|
mat.column2.x = v1.z; mat.column2.y = v1.w; mat.column2.z = v2;
|
||
|
|
}
|
||
|
|
|
||
|
|
PX_FORCE_INLINE PX_CUDA_CALLABLE void storePxMat33(PxgMat33Block& block, const PxU32 threadIndexInWarp, const PxMat33& mat)
|
||
|
|
{
|
||
|
|
block.mCol0[threadIndexInWarp] = make_float4(mat.column0.x, mat.column0.y, mat.column0.z, mat.column1.x);
|
||
|
|
block.mCol1[threadIndexInWarp] = make_float4(mat.column1.y, mat.column1.z, mat.column2.x, mat.column2.y);
|
||
|
|
block.mCol2[threadIndexInWarp] = mat.column2.z;
|
||
|
|
}
|
||
|
|
|
||
|
|
PX_FORCE_INLINE PX_CUDA_CALLABLE Cm::UnAlignedSpatialVector loadSpatialVector(const Cm::UnAlignedSpatialVector* PX_RESTRICT vector)
|
||
|
|
{
|
||
|
|
size_t ptr = size_t(vector);
|
||
|
|
if (ptr & 0xf)
|
||
|
|
{
|
||
|
|
float2* top = reinterpret_cast<float2*>(ptr);
|
||
|
|
float2 val = *top;
|
||
|
|
float4 val2 = *(reinterpret_cast<float4*>(top+1));
|
||
|
|
|
||
|
|
return Cm::UnAlignedSpatialVector(PxVec3(val.x, val.y, val2.x), PxVec3(val2.y, val2.z, val2.w));
|
||
|
|
}
|
||
|
|
else
|
||
|
|
{
|
||
|
|
float4* top = reinterpret_cast<float4*>(ptr);
|
||
|
|
float4 val = *top;
|
||
|
|
float2 val2 = *(reinterpret_cast<float2*>(top + 1));
|
||
|
|
|
||
|
|
return Cm::UnAlignedSpatialVector(PxVec3(val.x, val.y, val.z), PxVec3(val.w, val2.x, val2.y));
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
PX_FORCE_INLINE PX_CUDA_CALLABLE void storeSpatialVector(Cm::UnAlignedSpatialVector* PX_RESTRICT vector,
|
||
|
|
const Cm::UnAlignedSpatialVector& src)
|
||
|
|
{
|
||
|
|
size_t ptr = size_t(vector);
|
||
|
|
if (ptr & 0xf)
|
||
|
|
{
|
||
|
|
float2* top = reinterpret_cast<float2*>(ptr);
|
||
|
|
float4* bottom = (reinterpret_cast<float4*>(top + 1));
|
||
|
|
|
||
|
|
*top = make_float2(src.top.x, src.top.y);
|
||
|
|
*bottom = make_float4(src.top.z, src.bottom.x, src.bottom.y, src.bottom.z);
|
||
|
|
}
|
||
|
|
else
|
||
|
|
{
|
||
|
|
float4* top = reinterpret_cast<float4*>(ptr);
|
||
|
|
float2* bottom = (reinterpret_cast<float2*>(top + 1));
|
||
|
|
|
||
|
|
*top = make_float4(src.top.x, src.top.y, src.top.z, src.bottom.x);
|
||
|
|
*bottom = make_float2(src.bottom.y, src.bottom.z);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
struct PxgArticulationBitFieldStackData
|
||
|
|
{
|
||
|
|
PxU64 bitField[32];
|
||
|
|
};
|
||
|
|
|
||
|
|
typedef PxgArticulationBitFieldStackData PxgArticulationBitFieldData;
|
||
|
|
|
||
|
|
struct PxgArticulationTraversalStackData
|
||
|
|
{
|
||
|
|
PxU32 indices[32];
|
||
|
|
PxgSpatialVectorBlock impulseStack;
|
||
|
|
PxgSpatialVectorBlock deltaVStack;
|
||
|
|
};
|
||
|
|
|
||
|
|
struct PxgArticulationBlockLinkData
|
||
|
|
{
|
||
|
|
PxgSpatialMatrixBlock mSpatialArticulatedInertia;
|
||
|
|
PxgSpatialMatrix6x6Block mSpatialResponseMatrix;
|
||
|
|
PxgMat33Block mIsolatedInertia;
|
||
|
|
PxReal mMass[32];
|
||
|
|
|
||
|
|
//KS - these are questionable - we might want to store velocity/deltaVelocity etc. in a non-SOA format
|
||
|
|
//for the constraint solver. To be revisited later!!!!
|
||
|
|
PxgSpatialVectorBlock mMotionVelocity;
|
||
|
|
PxgSpatialVectorBlock mPosMotionVelocity;
|
||
|
|
PxgSpatialVectorBlock mDeltaMotion;
|
||
|
|
PxgSpatialVectorBlock mScratchImpulse; // These are non-propagated impulses. They may persist between kernels and are properly processed and then reset in averageLinkImpulsesAndPropagate. Should be reset to zero after propagation.
|
||
|
|
PxgSpatialVectorBlock mScratchDeltaV; // Temp! Used only for propagating velocities around the tree structure within a kernel! Left in an undefined state after use.
|
||
|
|
|
||
|
|
PxgSpatialVectorBlock mSolverSpatialDeltaVel;
|
||
|
|
PxgSpatialVectorBlock mSolverSpatialImpulse;
|
||
|
|
PxgSpatialVectorBlock mSolverSpatialInternalConstraintImpulse;
|
||
|
|
|
||
|
|
PxgSpatialVectorBlock mZAVector;
|
||
|
|
PxgSpatialVectorBlock mZAIntVector;
|
||
|
|
PxgSpatialVectorBlock mCoriolis;
|
||
|
|
PxgSpatialVectorBlock mConstraintForces;
|
||
|
|
|
||
|
|
PxgSpatialVectorBlock mMotionAcceleration;
|
||
|
|
PxgSpatialVectorBlock mMotionAccelerationInternal;
|
||
|
|
PxgSpatialVectorBlock mBiasForce;
|
||
|
|
|
||
|
|
PxReal mDeltaScale[32];
|
||
|
|
|
||
|
|
float mRw_x[32];
|
||
|
|
float mRw_y[32];
|
||
|
|
float mRw_z[32];
|
||
|
|
float4 mRelativeQuat[32];
|
||
|
|
|
||
|
|
PxU32 mNbStaticContacts[32];
|
||
|
|
PxU32 mStaticContactStartIndex[32];
|
||
|
|
|
||
|
|
PxU32 mNbStaticJoints[32];
|
||
|
|
PxU32 mStaticJointStartIndex[32];
|
||
|
|
|
||
|
|
PxgSpatialTransformBlock mPreTransform;
|
||
|
|
PxgSpatialTransformBlock mAccumulatedPose;
|
||
|
|
PxgSpatialTransformBlock mChildPose;
|
||
|
|
PxgSpatialTransformBlock mParentPose;
|
||
|
|
float4 mDeltaQ[32];
|
||
|
|
float4 mLinDampingX_AngDampingY_maxLinVelSqZ_maxAngVelSqW[32];
|
||
|
|
float4 mInvInertiaXYZ_invMassW[32];
|
||
|
|
PxReal mCfm[32];
|
||
|
|
PxU32 mChildrenOffset[32];
|
||
|
|
PxU32 mNumChildren[32];
|
||
|
|
|
||
|
|
PxU32 mJointOffset[32];
|
||
|
|
PxU32 mParents[32];
|
||
|
|
PxU8 mDofs[32];
|
||
|
|
PxU8 mJointType[32];
|
||
|
|
PxU8 mInvDofIds[6][32]; //mapping from axis to joint offset
|
||
|
|
PxU8 mDisableGravity[32];
|
||
|
|
PxU8 mRetainsAcceleration[32];
|
||
|
|
|
||
|
|
// PT: padded to 128 for coalesced loads
|
||
|
|
PxU32 pad[16];
|
||
|
|
};
|
||
|
|
PX_COMPILE_TIME_ASSERT((sizeof(PxgArticulationBlockLinkData) & 127)==0);
|
||
|
|
|
||
|
|
struct PxgArticulationBlockSpatialTendonData
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
PxU32 mNumAttachments[32];
|
||
|
|
PxU32 mNumConstraints[32];
|
||
|
|
PxReal mStiffness[32];
|
||
|
|
PxReal mDamping[32];
|
||
|
|
PxReal mLimitStiffness[32];
|
||
|
|
PxReal mOffset[32];
|
||
|
|
};
|
||
|
|
|
||
|
|
struct PxgArticulationBlockFixedTendonData
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
PxU32 mNumTendonJoints[32];
|
||
|
|
PxU32 mNumConstraints[32];
|
||
|
|
PxReal mStiffness[32];
|
||
|
|
PxReal mDamping[32];
|
||
|
|
PxReal mLimitStiffness[32];
|
||
|
|
PxReal mOffset[32];
|
||
|
|
PxReal mLowLimit[32];
|
||
|
|
PxReal mHighLimit[32];
|
||
|
|
PxReal mRestLength[32];
|
||
|
|
};
|
||
|
|
|
||
|
|
struct PxgArtiStateDirtyFlag
|
||
|
|
{
|
||
|
|
enum
|
||
|
|
{
|
||
|
|
eVEL_DIRTY = 1 << 0,
|
||
|
|
eHAS_IMPULSES = 1 << 1
|
||
|
|
};
|
||
|
|
};
|
||
|
|
|
||
|
|
//This class stores a block of data corresponding to 32 articulations.
|
||
|
|
//It is filled in at runtime.
|
||
|
|
struct PxgArticulationBlockData
|
||
|
|
{
|
||
|
|
PxgSpatialMatrixBlock mInvSpatialArticulatedInertia;
|
||
|
|
PxU32 mFlags[32]; // PT: seems to be mainly for PxArticulationFlag::eFIX_BASE so only 1 bit is needed? Merge with mNumLinks?
|
||
|
|
PxU32 mNumLinks[32];
|
||
|
|
PxU32 mNumSpatialTendons[32];
|
||
|
|
PxU32 mNumFixedTendons[32];
|
||
|
|
PxU32 mNumMimicJoints[32];
|
||
|
|
PxU32 mTotalDofs[32];
|
||
|
|
PxU32 mArticulationIndex[32];
|
||
|
|
PxReal mSleepThreshold[32];
|
||
|
|
|
||
|
|
//This one is a bit of a hack - it's the common link velocity.
|
||
|
|
PxgSpatialVectorBlock mCommonLinkDeltaVelocity;
|
||
|
|
PxU32 mLinkWithDeferredImpulse[32];
|
||
|
|
Cm::UnAlignedSpatialVector* mMotionVelocitiesPtr[32];
|
||
|
|
PxgSpatialVectorBlock mRootDeferredZ;
|
||
|
|
|
||
|
|
float4 mCOM_TotalInvMassW[32];
|
||
|
|
|
||
|
|
PxU8 mStateDirty[32]; //32 bytes
|
||
|
|
PxU32 mTotalSelfConstraintCount; //4 bytes
|
||
|
|
PxU32 mSelfConstraintOffset; //4 bytes
|
||
|
|
// PT: padded to 128 for coalesced loads
|
||
|
|
PxU32 pad[22]; //88 bytes padding, bringing this all up to 128 bytes
|
||
|
|
};
|
||
|
|
PX_COMPILE_TIME_ASSERT((sizeof(PxgArticulationBlockData) & 127)==0);
|
||
|
|
|
||
|
|
struct PxgArticulationInternalConstraintData
|
||
|
|
{
|
||
|
|
PxgSpatialVectorBlock mRow0;
|
||
|
|
PxgSpatialVectorBlock mRow1;
|
||
|
|
PxgSpatialVectorBlock mDeltaVA;
|
||
|
|
PxgSpatialVectorBlock mDeltaVB;
|
||
|
|
|
||
|
|
PxReal mRecipResponse[32];
|
||
|
|
PxReal mResponse[32];
|
||
|
|
|
||
|
|
float2 mLimits_LowLimitX_highLimitY[32];
|
||
|
|
float2 mLimitError_LowX_highY[32];
|
||
|
|
PxReal mHighImpulse[32];
|
||
|
|
PxReal mLowImpulse[32];
|
||
|
|
PxReal mMaxJointVelocity[32];
|
||
|
|
|
||
|
|
// drive
|
||
|
|
PxReal mDriveImpulse[32];
|
||
|
|
PxReal mConstraintMaxImpulse[32];
|
||
|
|
PxReal mMaxForce[32];
|
||
|
|
PxU32 mDriveType[32];
|
||
|
|
PxReal mMaxEffort[32];
|
||
|
|
PxReal mMaxActuatorVelocity[32];
|
||
|
|
PxReal mVelocityDependentResistance[32];
|
||
|
|
PxReal mSpeedEffortGradient[32];
|
||
|
|
|
||
|
|
PxReal mDriveTargetPos[32];
|
||
|
|
PxReal mArmature[32];
|
||
|
|
|
||
|
|
float4 mTargetVelPlusInitialBiasX_DriveBiasCoefficientY_VelMultiplierZ_ImpulseMultiplierW[32];
|
||
|
|
|
||
|
|
float mDriveStiffness[32];
|
||
|
|
float mDamping[32];
|
||
|
|
float mDriveTargetVel[32];
|
||
|
|
float mTargetPosBias[32];
|
||
|
|
|
||
|
|
PxReal mAccumulatedFrictionImpulse[32];
|
||
|
|
|
||
|
|
//old friction
|
||
|
|
PxReal mMaxFrictionForce[32];
|
||
|
|
PxReal mFrictionCoefficient[32];
|
||
|
|
|
||
|
|
//new friction
|
||
|
|
PxReal mStaticFrictionEffort[32];
|
||
|
|
PxReal mDynamicFrictionEffort[32];
|
||
|
|
PxReal mViscousFrictionCoefficient[32];
|
||
|
|
|
||
|
|
PX_FORCE_INLINE void PX_CUDA_CALLABLE setImplicitDriveDesc
|
||
|
|
(const PxU32 threadIndexInWarp,const Dy::ArticulationImplicitDriveDesc& driveDesc)
|
||
|
|
{
|
||
|
|
mTargetVelPlusInitialBiasX_DriveBiasCoefficientY_VelMultiplierZ_ImpulseMultiplierW[threadIndexInWarp] =
|
||
|
|
make_float4(driveDesc.driveTargetVelPlusInitialBias, driveDesc.driveBiasCoefficient, driveDesc.driveVelMultiplier, driveDesc.driveImpulseMultiplier);
|
||
|
|
mTargetPosBias[threadIndexInWarp] = driveDesc.driveTargetPosBias;
|
||
|
|
}
|
||
|
|
PX_FORCE_INLINE PX_CUDA_CALLABLE Dy::ArticulationImplicitDriveDesc getImplicitDriveDesc(const PxU32 threadIndexInWarp) const
|
||
|
|
{
|
||
|
|
const Dy::ArticulationImplicitDriveDesc driveDesc
|
||
|
|
(
|
||
|
|
mTargetVelPlusInitialBiasX_DriveBiasCoefficientY_VelMultiplierZ_ImpulseMultiplierW[threadIndexInWarp].x,
|
||
|
|
mTargetVelPlusInitialBiasX_DriveBiasCoefficientY_VelMultiplierZ_ImpulseMultiplierW[threadIndexInWarp].y,
|
||
|
|
mTargetVelPlusInitialBiasX_DriveBiasCoefficientY_VelMultiplierZ_ImpulseMultiplierW[threadIndexInWarp].z,
|
||
|
|
mTargetVelPlusInitialBiasX_DriveBiasCoefficientY_VelMultiplierZ_ImpulseMultiplierW[threadIndexInWarp].w,
|
||
|
|
mTargetPosBias[threadIndexInWarp]
|
||
|
|
);
|
||
|
|
return driveDesc;
|
||
|
|
}
|
||
|
|
};
|
||
|
|
PX_COMPILE_TIME_ASSERT((sizeof(PxgArticulationInternalConstraintData) & 127)==0);
|
||
|
|
|
||
|
|
struct PxgArticulationInternalTendonConstraintData
|
||
|
|
{
|
||
|
|
PxgSpatialVectorBlock mRow0;
|
||
|
|
PxgSpatialVectorBlock mRow1;
|
||
|
|
|
||
|
|
PxgSpatialVectorBlock mDeltaVB;
|
||
|
|
|
||
|
|
PxReal mRecipResponse[32];
|
||
|
|
PxReal mAccumulatedLength[32];
|
||
|
|
PxReal mBiasCoefficient[32];
|
||
|
|
PxReal mVelMultiplier[32];
|
||
|
|
PxReal mImpulseMultiplier[32];
|
||
|
|
PxReal mAppliedForce[32];
|
||
|
|
|
||
|
|
PxReal mLimitBiasCoefficient[32];
|
||
|
|
PxReal mLimitImpulseMultiplier[32];
|
||
|
|
PxReal mLimitAppliedForce[32];
|
||
|
|
|
||
|
|
PxU32 mLink0[32];
|
||
|
|
PxU32 mLink1[32];
|
||
|
|
PxReal mDeltaVA[32];
|
||
|
|
|
||
|
|
PxReal mRestDistance[32];
|
||
|
|
PxReal mLowLimit[32];
|
||
|
|
PxReal mHighLimit[32];
|
||
|
|
};
|
||
|
|
|
||
|
|
struct PxgArticulationBlockDofData
|
||
|
|
{
|
||
|
|
PxReal mJointPositions[32];
|
||
|
|
PxReal mJointVelocities[32];
|
||
|
|
PxReal mJointUnconstrainedVelocities[32];
|
||
|
|
PxReal mPosJointVelocities[32];
|
||
|
|
PxReal mJointAccel[32];
|
||
|
|
PxReal mQstZ[32];
|
||
|
|
PxReal mQstZIcInternal[32];
|
||
|
|
PxReal mDeferredQstZ[32];
|
||
|
|
PxReal mTmpQstZ[32];
|
||
|
|
|
||
|
|
PxgSpatialVectorBlock mWorldMotionMatrix;
|
||
|
|
PxgSpatialVectorBlock mLocalMotionMatrix;
|
||
|
|
PxgSpatialVectorBlock mIsInvDW;
|
||
|
|
PxgSpatialVectorBlock mIsW;
|
||
|
|
PxgSpatialVectorBlock mJointAxis;
|
||
|
|
float mInvStIsT_x[32];
|
||
|
|
float mInvStIsT_y[32];
|
||
|
|
float mInvStIsT_z[32];
|
||
|
|
PxU8 mDofIds[32]; //mapping from joint offset to axis
|
||
|
|
PxU8 mMotion[32];
|
||
|
|
|
||
|
|
PxgArticulationInternalConstraintData mConstraintData;
|
||
|
|
|
||
|
|
// PT: padded to 128 for coalesced loads
|
||
|
|
PxU32 pad[16];
|
||
|
|
};
|
||
|
|
PX_COMPILE_TIME_ASSERT((sizeof(PxgArticulationBlockDofData) & 127)==0);
|
||
|
|
|
||
|
|
struct PxgArticulationBlockAttachmentData
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
PxVec3 mRelativeOffset[32];
|
||
|
|
PxReal mRestDistance[32];
|
||
|
|
PxReal mCoefficient[32];
|
||
|
|
|
||
|
|
PxU64 mChildrens[32];
|
||
|
|
PxU16 mLinkIndex[32];
|
||
|
|
PxU32 mParents[32];
|
||
|
|
PxReal mLowLimit[32];
|
||
|
|
PxReal mHighLimit[32];
|
||
|
|
};
|
||
|
|
|
||
|
|
struct PxgArticulationBlockTendonJointData
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
PxU32 mAxis[32];
|
||
|
|
PxReal mCoefficient[32];
|
||
|
|
PxReal mRecipCoefficient[32];
|
||
|
|
|
||
|
|
PxU64 mChildrens[32];
|
||
|
|
PxU16 mLinkIndex[32];
|
||
|
|
PxU32 mParents[32];
|
||
|
|
PxU32 mConstraintId[32];
|
||
|
|
};
|
||
|
|
|
||
|
|
struct PxgArticulationBlockInternallMimicJointData
|
||
|
|
{
|
||
|
|
PxU32 mDofA[32];
|
||
|
|
PxU32 mDofB[32];
|
||
|
|
//Cache recip effectiveInertia = [J * M^-1 * J^T] = [rAA + gearRatio*(rAB + rBA) + gearRatio*gearRatio*rBB]
|
||
|
|
//Impulse = [1, gearRatio]^T * [-b + J*v] /[J * M^-1 * J^T + cfm]
|
||
|
|
//Impulse = [1, gearRatio]^T * [-b + J*v] / [recipEffectiveInertia + cfm];
|
||
|
|
PxReal recipEffectiveInertia[32];
|
||
|
|
};
|
||
|
|
|
||
|
|
struct PxgArticulationBlockMimicJointData
|
||
|
|
{
|
||
|
|
PxU32 mLinkA[32];
|
||
|
|
PxU32 mAxisA[32];
|
||
|
|
PxU32 mLinkB[32];
|
||
|
|
PxU32 mAxisB[32];
|
||
|
|
PxReal mGearRatio[32];
|
||
|
|
PxReal mOffset[32];
|
||
|
|
PxReal mNaturalFrequency[32];
|
||
|
|
PxReal mDampingRatio[32];
|
||
|
|
PxgArticulationBlockInternallMimicJointData mInternalData;
|
||
|
|
};
|
||
|
|
|
||
|
|
struct PxgArticulationData
|
||
|
|
{
|
||
|
|
// index in the articulation buffer. This is the same as articulationRemapId in the PxgBodySim
|
||
|
|
PxU32 index;
|
||
|
|
PxU32 bodySimIndex;
|
||
|
|
PxU32 gpuDirtyFlag;
|
||
|
|
PxU32 updateDirty;
|
||
|
|
|
||
|
|
PxU16 numLinks;
|
||
|
|
PxU16 numJointDofs;
|
||
|
|
PxU16 numSpatialTendons;
|
||
|
|
|
||
|
|
PxU16 numFixedTendons;
|
||
|
|
PxU16 numMimicJoints;
|
||
|
|
PxU8 flags;
|
||
|
|
bool confiDirty;
|
||
|
|
|
||
|
|
PxU32 numPathToRoots;
|
||
|
|
};
|
||
|
|
|
||
|
|
#if PX_VC
|
||
|
|
#pragma warning(push)
|
||
|
|
#pragma warning(disable:4324)
|
||
|
|
#endif
|
||
|
|
PX_ALIGN_PREFIX(16)
|
||
|
|
class PxgArticulation
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
|
||
|
|
PxgArticulationData data;
|
||
|
|
|
||
|
|
PxgArticulationLink* links;
|
||
|
|
Dy::ArticulationJointCore* joints;
|
||
|
|
Dy::ArticulationJointCoreData* jointData;
|
||
|
|
|
||
|
|
Cm::UnAlignedSpatialVector* motionVelocities;
|
||
|
|
Cm::UnAlignedSpatialVector* motionAccelerations;
|
||
|
|
Cm::UnAlignedSpatialVector* linkIncomingJointForces;
|
||
|
|
Cm::UnAlignedSpatialVector* corioliseVectors;
|
||
|
|
Cm::UnAlignedSpatialVector* zAForces; // used as temporary propagation buffer in inverseDynamics and to store TGS per substep isolated forces while the solver runs. Not cleared after use.
|
||
|
|
|
||
|
|
Cm::UnAlignedSpatialVector* externalAccelerations;
|
||
|
|
|
||
|
|
Cm::UnAlignedSpatialVector* rootPreMotionVelocity;
|
||
|
|
|
||
|
|
PxReal* jointPositions;
|
||
|
|
PxReal* jointVelocities;
|
||
|
|
PxReal* jointAccelerations;
|
||
|
|
PxReal* jointForce;
|
||
|
|
PxReal* jointTargetPositions;
|
||
|
|
PxReal* jointTargetVelocities;
|
||
|
|
|
||
|
|
PxU32* jointOffsets;
|
||
|
|
|
||
|
|
PxSpatialMatrix* worldSpatialArticulatedInertia;
|
||
|
|
PxSpatialMatrix* spatialResponseMatrixW;
|
||
|
|
PxTransform* linkBody2Worlds;
|
||
|
|
PxU8* linkJointRootStateDataBuffer;
|
||
|
|
PxTransform* linkBody2Actors;
|
||
|
|
PxU32* parents;
|
||
|
|
//Local space motion matrix - constant unless properties are changed
|
||
|
|
Dy::SpatialSubspaceMatrix* motionMatrix;
|
||
|
|
//World space motion matrix - computed from local matrix each frame
|
||
|
|
Dy::SpatialSubspaceMatrix* worldMotionMatrix; // AD: only inverse dynamics now.
|
||
|
|
|
||
|
|
Cm::UnAlignedSpatialVector* jointAxis;
|
||
|
|
|
||
|
|
PxReal* linkWakeCounters;
|
||
|
|
PxgArticulationLinkSleepData* linkSleepData;
|
||
|
|
PxgArticulationLinkProp* linkProps;
|
||
|
|
ArticulationBitField* children;
|
||
|
|
PxU32* pathToRoot;
|
||
|
|
|
||
|
|
PxQuat* relativeQuat;
|
||
|
|
|
||
|
|
PxQuat* tempParentToChilds;
|
||
|
|
PxVec3* tempRs;
|
||
|
|
|
||
|
|
PxGpuSpatialTendonData* spatialTendonParams;
|
||
|
|
PxgArticulationTendon* spatialTendons;
|
||
|
|
|
||
|
|
PxGpuFixedTendonData* fixedTendonParams;
|
||
|
|
PxgArticulationTendon* fixedTendons;
|
||
|
|
|
||
|
|
PxReal* cfms;
|
||
|
|
PxReal* cfmScale;
|
||
|
|
|
||
|
|
Dy::ArticulationMimicJointCore* mimicJointCores;
|
||
|
|
|
||
|
|
PX_ALIGN(16, PxSpatialMatrix) invSpatialArticulatedInertiaW;
|
||
|
|
|
||
|
|
Dy::ErrorAccumulator internalResidualAccumulator; //Internal residual means no errors introduces by contacts or non-articulation joints connected to this instance will be included
|
||
|
|
Dy::ErrorAccumulator contactResidualAccumulator;
|
||
|
|
}
|
||
|
|
PX_ALIGN_SUFFIX(16);
|
||
|
|
#if PX_VC
|
||
|
|
#pragma warning(pop)
|
||
|
|
#endif
|
||
|
|
|
||
|
|
/*
|
||
|
|
\brief We aggregate link, joint, and root link state data into a single char buffer.
|
||
|
|
We do this in two ways:
|
||
|
|
a) a single buffer for each articulation
|
||
|
|
b) a single buffer for all articulations
|
||
|
|
The typical pattern of data flow is as follows:
|
||
|
|
a) we store state data in a unique device buffer for each articulation.
|
||
|
|
b) we copy from the individual device buffers per articulation to the single device buffer for all articulations.
|
||
|
|
c) we copy the single buffer for all articulations from device to host
|
||
|
|
d) we distribute state data from the single host buffer on the host to the individual articulation instances on the host.
|
||
|
|
The state data that we store is as follows:
|
||
|
|
a) link body2Worlds array, link velocities array, link accelerations array, link incoming joint forces array
|
||
|
|
b) joint positions array, joint velocities array, joint accelerations array
|
||
|
|
d) root link pre-sim velocity
|
||
|
|
The struct PxgLinkJointRootStateData contains helper functions for allocating and querying
|
||
|
|
state data buffers.
|
||
|
|
*/
|
||
|
|
struct PxgArticulationLinkJointRootStateData
|
||
|
|
{/**
|
||
|
|
\brief Compute the number of bytes required for an articulation with known
|
||
|
|
maximum link count and known maximum dof count.
|
||
|
|
\param[in] maxNbLinks is the maximum number of links of any articulation in the ensemble of articulations.
|
||
|
|
\param[in] maxNbDofs is the maximum number of dofs of any articulation in the ensemble of articulations.
|
||
|
|
\note This does not return an aligned size, use computeStateDataBufferByteSizeAligned16 for that purpose
|
||
|
|
\return The number of bytes required to store the state data for an articulation.
|
||
|
|
*/
|
||
|
|
static PX_CUDA_CALLABLE PX_FORCE_INLINE PxU32 computeSingleArticulationStateDataBufferByteSizeRaw
|
||
|
|
(const PxU32 maxNbLinks, const PxU32 maxNbDofs)
|
||
|
|
{
|
||
|
|
PxU32 byteSizePerArt =
|
||
|
|
(sizeof(PxTransform) + 3 * sizeof(Cm::UnAlignedSpatialVector)) * maxNbLinks; //link pose + link velocity + link acceleration + link incoming joint force
|
||
|
|
byteSizePerArt += sizeof(PxReal) * maxNbDofs; //joint pos
|
||
|
|
byteSizePerArt += sizeof(PxReal) * maxNbDofs; //joint vel
|
||
|
|
byteSizePerArt += sizeof(PxReal) * maxNbDofs; //joint accel
|
||
|
|
byteSizePerArt += sizeof(Cm::UnAlignedSpatialVector); //root pre-sim vel
|
||
|
|
return byteSizePerArt;
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
\brief Compute the number of bytes required for an ensemble of articulations with known
|
||
|
|
maximum link count and known maximum dof count.
|
||
|
|
\param[in] maxNbLinks is the maximum number of links of any articulation in the ensemble of articulations.
|
||
|
|
\param[in] maxNbDofs is the maximum number of dofs of any articulation in the ensemble of articulations.
|
||
|
|
\param[in] nbArticulations is the number of articulations in the ensemble.
|
||
|
|
\note This may be used to compute the number of bytes required for a single articulation by setting
|
||
|
|
nbArticulations to 1 and setting maxNbLinks etc to be the link and dof count of that articulation.
|
||
|
|
\return The number of bytes required to store the state data for an ensemble of articulations.
|
||
|
|
*/
|
||
|
|
static PX_CUDA_CALLABLE PX_FORCE_INLINE PxU32 computeStateDataBufferByteSizeAligned16
|
||
|
|
(const PxU32 maxNbLinks, const PxU32 maxNbDofs, const PxU32 nbArticulations)
|
||
|
|
{
|
||
|
|
const PxU32 byteSizePerArt = computeSingleArticulationStateDataBufferByteSizeRaw(maxNbLinks, maxNbDofs);
|
||
|
|
const PxU32 byteSize16PerArt = ((byteSizePerArt + 15) & ~15); //align the size upwards to the next 16-byte boundary
|
||
|
|
return (byteSize16PerArt * nbArticulations);
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
\brief Return the pointer to a single articulation's state data buffer.
|
||
|
|
\param[in] inputBufferForAllArticulations is a pointer to the memory containing the state
|
||
|
|
data for the entire ensemble of articulations.
|
||
|
|
\param[in] maxNbLinks is the maximum number of links of any articulation in the ensemble of articulations.
|
||
|
|
\param[in] maxNbDofs is the maximum number of dofs of any articulation in the ensemble of articulations.
|
||
|
|
\param[in] articulationId is the index of a single articulation within the ensemble.
|
||
|
|
\return The pointer to a single articulation's state data buffer.
|
||
|
|
*/
|
||
|
|
static PX_CUDA_CALLABLE PX_FORCE_INLINE PxU8* getArticulationStateDataBuffer
|
||
|
|
(PxU8* inputBufferForAllArticulations,
|
||
|
|
const PxU32 maxNbLinks, const PxU32 maxNbDofs,
|
||
|
|
const PxU32 articulationId)
|
||
|
|
{
|
||
|
|
PxU8* singleArticulationStateBuffer =
|
||
|
|
inputBufferForAllArticulations +
|
||
|
|
computeStateDataBufferByteSizeAligned16(maxNbLinks, maxNbDofs, articulationId);
|
||
|
|
|
||
|
|
return singleArticulationStateBuffer;
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
\brief Decompose the state data buffer of a single articulation into pointers to arrays of
|
||
|
|
poses, velocities etc.
|
||
|
|
\param[in] singleArticulationStateBuffer is the state data buffer for a single articulation.
|
||
|
|
\param[in] nbLinks is the number of links for the single articulation.
|
||
|
|
\param[in] nbDofs is the number of dofs for the single articulation.
|
||
|
|
\param[out] linkBody2Worlds is an array of link poses with one element per link.
|
||
|
|
\param[out] linkVels is a pointer to an array of link spatial velocities with one element per link.
|
||
|
|
\param[out] linkIncomingJointForces is a pointer to an array of link incoming joint forces with one element per link.
|
||
|
|
\param[out] jointPositions is a pointer to an array of joint positions with one element per dof.
|
||
|
|
\param[out] jointVelocities is a pointer to an array of joint velocities with one element per link.
|
||
|
|
\param[out] jointAccelerations is a pointer to an array of joint accelerations with one element per dof.
|
||
|
|
\param[out] rootPreVel is a pointer to the pre-sim velocity of the single articulation's root link.
|
||
|
|
*/
|
||
|
|
static PX_CUDA_CALLABLE PX_FORCE_INLINE void decomposeArticulationStateDataBuffer
|
||
|
|
(PxU8* singleArticulationStateBuffer,
|
||
|
|
const PxU32 nbLinks, const PxU32 nbDofs,
|
||
|
|
PxTransform*& linkBody2Worlds, Cm::UnAlignedSpatialVector*& linkVels, Cm::UnAlignedSpatialVector*& linkAccels, Cm::UnAlignedSpatialVector*& linkIncomingJointForces,
|
||
|
|
PxReal*& jointPositions, PxReal*& jointVelocities, PxReal*& jointAccelerations,
|
||
|
|
Cm::UnAlignedSpatialVector*& rootPreVel)
|
||
|
|
{
|
||
|
|
PxU8* buffer = singleArticulationStateBuffer;
|
||
|
|
linkBody2Worlds = reinterpret_cast<PxTransform*>(buffer);
|
||
|
|
buffer += sizeof(PxTransform) * nbLinks;
|
||
|
|
linkVels = reinterpret_cast<Cm::UnAlignedSpatialVector*>(buffer);
|
||
|
|
buffer += sizeof(Cm::UnAlignedSpatialVector) * nbLinks;
|
||
|
|
linkAccels = reinterpret_cast<Cm::UnAlignedSpatialVector*>(buffer);
|
||
|
|
buffer += sizeof(Cm::UnAlignedSpatialVector) * nbLinks;
|
||
|
|
linkIncomingJointForces = reinterpret_cast<Cm::UnAlignedSpatialVector*>(buffer);
|
||
|
|
buffer += sizeof(Cm::UnAlignedSpatialVector) * nbLinks;
|
||
|
|
jointPositions = reinterpret_cast<PxReal*>(buffer);
|
||
|
|
buffer += sizeof(PxReal) * nbDofs;
|
||
|
|
jointVelocities = reinterpret_cast<PxReal*>(buffer);
|
||
|
|
buffer += sizeof(PxReal) * nbDofs;
|
||
|
|
jointAccelerations = reinterpret_cast<PxReal*>(buffer);
|
||
|
|
buffer += sizeof(PxReal) * nbDofs;
|
||
|
|
rootPreVel = reinterpret_cast<Cm::UnAlignedSpatialVector*>(buffer);
|
||
|
|
PX_ASSERT(
|
||
|
|
singleArticulationStateBuffer + computeStateDataBufferByteSizeAligned16(nbLinks, nbDofs, 1) ==
|
||
|
|
reinterpret_cast<PxU8*>(((reinterpret_cast<size_t>(buffer) + sizeof(Cm::UnAlignedSpatialVector) + 15) & ~15)));
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
\brief Compute the pointer to the array of link poses for a single articulation.
|
||
|
|
\param[in] singleArticulationStateBuffer is the state data buffer for a single articulation.
|
||
|
|
\param[in] nbLinks is the number of links for the single articulation.
|
||
|
|
\param[in] nbDofs is the number of dofs for the single articulation.
|
||
|
|
\return The pointer to the array of link poses for a single articulation.
|
||
|
|
*/
|
||
|
|
//Get the body2World array from the state data buffer of a single articulation.
|
||
|
|
static PX_CUDA_CALLABLE PX_FORCE_INLINE PxTransform* getArticulationBody2Worlds
|
||
|
|
(PxU8* singleArticulationStateBuffer,
|
||
|
|
const PxU32 nbLinks, const PxU32 nbDofs)
|
||
|
|
{
|
||
|
|
PxTransform* linkBody2Worlds;
|
||
|
|
Cm::UnAlignedSpatialVector* linkVels;
|
||
|
|
Cm::UnAlignedSpatialVector* linkAccels;
|
||
|
|
Cm::UnAlignedSpatialVector* linkIncomingJointForces;
|
||
|
|
PxReal* jointPositions;
|
||
|
|
PxReal* jointVelocities;
|
||
|
|
PxReal* jointAccelerations;
|
||
|
|
Cm::UnAlignedSpatialVector* rootPreVel;
|
||
|
|
|
||
|
|
decomposeArticulationStateDataBuffer(
|
||
|
|
singleArticulationStateBuffer,
|
||
|
|
nbLinks, nbDofs,
|
||
|
|
linkBody2Worlds, linkVels, linkAccels, linkIncomingJointForces,
|
||
|
|
jointPositions, jointVelocities, jointAccelerations,
|
||
|
|
rootPreVel);
|
||
|
|
|
||
|
|
return linkBody2Worlds;
|
||
|
|
}
|
||
|
|
};
|
||
|
|
|
||
|
|
|
||
|
|
class PxgArticulationBuffer : public PxUserAllocated
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
|
||
|
|
PxgArticulationBuffer(PxgHeapMemoryAllocatorManager* heapMemoryManager);
|
||
|
|
|
||
|
|
~PxgArticulationBuffer();
|
||
|
|
|
||
|
|
PxgTypedCudaBuffer<PxgArticulationLink> links;
|
||
|
|
PxgTypedCudaBuffer<PxReal> linkWakeCounters; //original set to the same as articulation wakeCounter
|
||
|
|
PxgTypedCudaBuffer<PxgArticulationLinkSleepData> linkSleepData;
|
||
|
|
PxgTypedCudaBuffer<PxgArticulationLinkProp> linkProps;
|
||
|
|
PxgTypedCudaBuffer<Dy::ArticulationJointCore> joints;
|
||
|
|
PxgTypedCudaBuffer<Dy::ArticulationJointCoreData> jointData;
|
||
|
|
PxgTypedCudaBuffer<Cm::UnAlignedSpatialVector> corioliseVectors; //link coriolise vector
|
||
|
|
PxgTypedCudaBuffer<Cm::UnAlignedSpatialVector> zAForces; //link spatial zero acceleration force/ spatical articulate
|
||
|
|
PxgTypedCudaBuffer<PxU32> pathToRoots; //global array store path to root for each link in continuous. Each link should have a start index and numberOfElems
|
||
|
|
|
||
|
|
PxgTypedCudaBuffer<PxGpuSpatialTendonData> spatialTendonParams;
|
||
|
|
PxgTypedCudaBuffer<PxgArticulationTendon> spatialTendons;
|
||
|
|
PxArray<PxgCudaBuffer*> attachmentFixedData;
|
||
|
|
PxArray<PxgCudaBuffer*> attachmentModData;
|
||
|
|
|
||
|
|
PxgTypedCudaBuffer<PxGpuFixedTendonData> fixedTendonParams;
|
||
|
|
PxgTypedCudaBuffer<PxgArticulationTendon> fixedTendons;
|
||
|
|
PxArray<PxgCudaBuffer*> tendonJointFixData;
|
||
|
|
PxArray<PxgCudaBuffer*> tendonJointCoefficientData;
|
||
|
|
|
||
|
|
PxgTypedCudaBuffer<Dy::ArticulationMimicJointCore> mimicJoints;
|
||
|
|
|
||
|
|
PxgTypedCudaBuffer<Cm::UnAlignedSpatialVector> externalAccelerations;
|
||
|
|
|
||
|
|
PxgTypedCudaBuffer<PxReal> jointForce;
|
||
|
|
PxgTypedCudaBuffer<PxReal> jointTargetPositions;
|
||
|
|
PxgTypedCudaBuffer<PxReal> jointTargetVelocities;
|
||
|
|
PxgTypedCudaBuffer<PxU32> jointOffsets;
|
||
|
|
PxgTypedCudaBuffer<PxU32> parents;
|
||
|
|
PxgTypedCudaBuffer<Dy::SpatialSubspaceMatrix> motionMatrix;
|
||
|
|
PxgTypedCudaBuffer<Dy::SpatialSubspaceMatrix> motionMatrixW;
|
||
|
|
PxgTypedCudaBuffer<Cm::UnAlignedSpatialVector> jointAxis;
|
||
|
|
|
||
|
|
PxgTypedCudaBuffer<PxSpatialMatrix> spatialArticulatedInertiaW;
|
||
|
|
PxgTypedCudaBuffer<PxSpatialMatrix> spatialImpulseResponseW;
|
||
|
|
|
||
|
|
//see PxgArticulationLinkJointRootStateData
|
||
|
|
PxgCudaBuffer linkAndJointAndRootStates;
|
||
|
|
|
||
|
|
PxgTypedCudaBuffer<PxTransform> linkBody2Actors;
|
||
|
|
|
||
|
|
PxgTypedCudaBuffer<ArticulationBitField> children;
|
||
|
|
|
||
|
|
PxgTypedCudaBuffer<PxQuat> relativeQuats;
|
||
|
|
PxgTypedCudaBuffer<PxReal> cfms;
|
||
|
|
PxgTypedCudaBuffer<PxReal> cfmScale;
|
||
|
|
|
||
|
|
PxgTypedCudaBuffer<PxQuat> tempParentToChilds;
|
||
|
|
PxgTypedCudaBuffer<PxVec3> tempRs;
|
||
|
|
|
||
|
|
PxU32 linkCount;
|
||
|
|
|
||
|
|
PxgHeapMemoryAllocatorManager* mHeapMemoryManager;
|
||
|
|
};
|
||
|
|
|
||
|
|
//Helper function to compute the index of a particular link's deltaV value in the deltaV buffer.
|
||
|
|
//We store this in a particular order to try and minimize cache misses
|
||
|
|
PX_FORCE_INLINE PX_CUDA_CALLABLE PxU32 computeDeltaVIndex(const PxU32 maxArticulations, const PxU32 maxLinks,
|
||
|
|
const PxU32 articulationID, const PxU32 linkID, const PxU32 slabID)
|
||
|
|
{
|
||
|
|
return articulationID + linkID * maxArticulations + slabID*maxArticulations*maxLinks;
|
||
|
|
}
|
||
|
|
|
||
|
|
}
|
||
|
|
|
||
|
|
#endif
|