395 lines
14 KiB
Plaintext
395 lines
14 KiB
Plaintext
// Redistribution and use in source and binary forms, with or without
|
|
// modification, are permitted provided that the following conditions
|
|
// are met:
|
|
// * Redistributions of source code must retain the above copyright
|
|
// notice, this list of conditions and the following disclaimer.
|
|
// * Redistributions in binary form must reproduce the above copyright
|
|
// notice, this list of conditions and the following disclaimer in the
|
|
// documentation and/or other materials provided with the distribution.
|
|
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
|
// contributors may be used to endorse or promote products derived
|
|
// from this software without specific prior written permission.
|
|
//
|
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
|
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
//
|
|
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
|
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
|
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
|
|
|
#include "foundation/PxBounds3.h"
|
|
#include "foundation/PxSimpleTypes.h"
|
|
#include "foundation/PxTransform.h"
|
|
|
|
#include "geometry/PxGeometry.h"
|
|
#include "geometry/PxHeightFieldSample.h"
|
|
|
|
#include "PxgContactManager.h"
|
|
#include "PxgNpKernelIndices.h"
|
|
#include "PxgPersistentContactManifold.h"
|
|
#include "PxgSimulationCoreDesc.h"
|
|
|
|
#include "PxsContactManagerState.h"
|
|
#include "PxsTransformCache.h"
|
|
|
|
#include "convexNpCommon.h"
|
|
#include "cudaNpCommon.h"
|
|
|
|
#include "PxgCommonDefines.h"
|
|
#include "dataReadWriteHelper.cuh"
|
|
#include "heightfieldUtil.cuh"
|
|
#include "manifold.cuh"
|
|
#include "midphaseAllocate.cuh"
|
|
|
|
#include <vector_types.h>
|
|
|
|
using namespace physx;
|
|
|
|
extern "C" __host__ void initNarrowphaseKernels0() {}
|
|
|
|
PX_ALIGN_PREFIX(16)
|
|
struct HeigtFieldDataScratch
|
|
{
|
|
PxTransform convexToHeightfieldNoScale;
|
|
PxU32 convexShape_materialIndex;
|
|
PxU32 heightfieldShape_materialIndex;
|
|
}PX_ALIGN_SUFFIX(16);
|
|
|
|
|
|
template<unsigned int WarpsPerBlock>
|
|
__device__ static inline void heightfieldMidphaseCore(
|
|
PxU32 numContactManagers,
|
|
const PxReal toleranceLength,
|
|
const PxgContactManagerInput* PX_RESTRICT cmInputs,
|
|
const PxsCachedTransform* PX_RESTRICT transformCache,
|
|
const PxBounds3* PX_RESTRICT bounds,
|
|
const PxReal* PX_RESTRICT contactDistance,
|
|
const PxgShape* PX_RESTRICT gpuShapes,
|
|
|
|
ConvexMeshPair* PX_RESTRICT cvxTrimeshPair,
|
|
PxgPersistentContactMultiManifold* PX_RESTRICT multiManifolds,
|
|
PxsContactManagerOutput * PX_RESTRICT cmOutputs,
|
|
uint4* PX_RESTRICT stackBasePtr,
|
|
PxU32* PX_RESTRICT nbPairsFound,
|
|
PxU32* PX_RESTRICT midphasePairsNum,
|
|
PxU32* PX_RESTRICT midphasePairsNumPadded,
|
|
|
|
HeigtFieldDataScratch* s_warpScratch,
|
|
const PxU32 maxPairs
|
|
)
|
|
{
|
|
//thread index in warp
|
|
const unsigned int idxInWarp = threadIdx.x;
|
|
//wrap index
|
|
const unsigned int warpIdx = threadIdx.y;
|
|
|
|
unsigned int cmIdx = warpIdx + blockIdx.x * blockDim.y;
|
|
|
|
//this is number of contact managers
|
|
PxU32 nbPairsPerCM = 0;
|
|
|
|
if (cmIdx < numContactManagers)
|
|
{
|
|
PxgContactManagerInput npWorkItem;
|
|
PxgContactManagerInput_ReadWarp(npWorkItem, cmInputs, cmIdx);
|
|
|
|
PxsCachedTransform transformCached, heightfieldTransformCached;
|
|
PxsCachedTransform_ReadWarp(transformCached, transformCache + npWorkItem.transformCacheRef0);
|
|
PxsCachedTransform_ReadWarp(heightfieldTransformCached, transformCache + npWorkItem.transformCacheRef1);
|
|
|
|
const PxTransform shapeToHeightfieldNoScale = heightfieldTransformCached.transform.transformInv(transformCached.transform);
|
|
|
|
//read convex/sphere shape
|
|
PxgShape shape;
|
|
PxgShape_ReadWarp(shape, gpuShapes + npWorkItem.shapeRef0);
|
|
|
|
|
|
PxReal ratio, minMargin, breakingThresholdRatio;
|
|
if (shape.type == PxGeometryType::eSPHERE)
|
|
{
|
|
minMargin = shape.scale.scale.x; //sphere radius
|
|
ratio = 0.02f;
|
|
breakingThresholdRatio = 0.05f;
|
|
}
|
|
else if (shape.type == PxGeometryType::eCAPSULE)
|
|
{
|
|
minMargin = shape.scale.scale.y; //capsule radius
|
|
ratio = 0.02;
|
|
breakingThresholdRatio = 0.05f;
|
|
}
|
|
else
|
|
{
|
|
PxU8* hullPtr = reinterpret_cast<PxU8*>(shape.hullOrMeshPtr);
|
|
|
|
const float4 extents4_f = *reinterpret_cast<float4*>(hullPtr + sizeof(float4) * 2);
|
|
minMargin = calculatePCMConvexMargin(extents4_f, shape.scale.scale, toleranceLength);
|
|
ratio = 0.2f;
|
|
breakingThresholdRatio = 0.8f;
|
|
}
|
|
|
|
/*PxU8* hullPtr = reinterpret_cast<PxU8*>(convexShape.hullOrMeshPtr);
|
|
|
|
const float4 extents4_f = *reinterpret_cast<float4*>(hullPtr + sizeof(float4) * 2);
|
|
const PxReal minMargin = calculatePCMConvexMargin(extents4_f, convexShape.scale.scale, toleranceLength);*/
|
|
|
|
bool lostContacts = false;
|
|
|
|
const bool invalidate = invalidateManifold(shapeToHeightfieldNoScale, multiManifolds[cmIdx], minMargin, ratio);
|
|
|
|
if (!invalidate)
|
|
{
|
|
const PxReal projectBreakingThreshold = minMargin * breakingThresholdRatio;
|
|
|
|
lostContacts = refreshManifolds(
|
|
shapeToHeightfieldNoScale,
|
|
projectBreakingThreshold,
|
|
multiManifolds + cmIdx
|
|
);
|
|
}
|
|
|
|
bool fullContactGen = invalidate || lostContacts;
|
|
|
|
//read heightfield shape
|
|
PxgShape heightFieldShape;
|
|
PxgShape_ReadWarp(heightFieldShape, gpuShapes + npWorkItem.shapeRef1);
|
|
|
|
if (idxInWarp == 0)
|
|
{
|
|
s_warpScratch->convexToHeightfieldNoScale = shapeToHeightfieldNoScale;
|
|
s_warpScratch->convexShape_materialIndex = shape.materialIndex;
|
|
s_warpScratch->heightfieldShape_materialIndex = heightFieldShape.materialIndex;
|
|
}
|
|
|
|
__syncwarp();
|
|
|
|
//if invalidate is true, generate full contacts
|
|
if (fullContactGen)
|
|
{
|
|
|
|
PxU32* heightfieldData = reinterpret_cast<PxU32*>(heightFieldShape.hullOrMeshPtr);
|
|
const PxU32 nbRows = heightfieldData[0];
|
|
const PxU32 nbCols = heightfieldData[1];
|
|
PxHeightFieldSample* samples = reinterpret_cast<PxHeightFieldSample*>(&heightfieldData[2]);
|
|
|
|
|
|
const PxReal oneOverHeightScale = 1.f / heightFieldShape.scale.scale.y;
|
|
const PxReal oneOverRowScale = 1.f / PxAbs(heightFieldShape.scale.scale.x);
|
|
const PxReal oneOverlColScale = 1.f / PxAbs(heightFieldShape.scale.scale.z);
|
|
|
|
PxBounds3 worldBound;
|
|
PxBounds3_ReadWarp(worldBound, bounds + npWorkItem.transformCacheRef0);
|
|
|
|
//bound is in world space, we need to transform the bound to the local space of height field
|
|
PxBounds3 localBound = PxBounds3::transformFast(heightfieldTransformCached.transform.getInverse(), worldBound);
|
|
const PxReal contactDist = contactDistance[npWorkItem.transformCacheRef0] + contactDistance[npWorkItem.transformCacheRef1];
|
|
localBound.fattenFast(contactDist);
|
|
|
|
localBound.minimum.x *= oneOverRowScale;
|
|
localBound.minimum.y *= oneOverHeightScale;
|
|
localBound.minimum.z *= oneOverlColScale;
|
|
|
|
localBound.maximum.x *= oneOverRowScale;
|
|
localBound.maximum.y *= oneOverHeightScale;
|
|
localBound.maximum.z *= oneOverlColScale;
|
|
|
|
//row scale
|
|
if (heightFieldShape.scale.scale.x < 0.f)
|
|
{
|
|
//swap min and max row scale
|
|
const PxReal temp = localBound.minimum.x;
|
|
localBound.minimum.x = localBound.maximum.x;
|
|
localBound.maximum.x = temp;
|
|
}
|
|
|
|
//col scale
|
|
if (heightFieldShape.scale.scale.z < 0.f)
|
|
{
|
|
PxReal swap = localBound.minimum.z;
|
|
localBound.minimum.z = localBound.maximum.z;
|
|
localBound.maximum.z = swap;
|
|
}
|
|
|
|
bool boundsDontOverlap = false;
|
|
|
|
// this tests if the complete shape is outside of the bounds of the HF in the XZ plane.
|
|
if ((localBound.minimum.x > nbRows - 1) || (localBound.minimum.z > nbCols - 1)
|
|
|| (localBound.maximum.x < 0) || (localBound.maximum.z < 0))
|
|
{
|
|
boundsDontOverlap = true;
|
|
}
|
|
|
|
if (!boundsDontOverlap)
|
|
{
|
|
PxU32 minRow = getMinRow(localBound.minimum.x, nbRows);
|
|
PxU32 maxRow = getMaxRow(localBound.maximum.x, nbRows);
|
|
PxU32 minColumn = getMinColumn(localBound.minimum.z, nbCols);
|
|
PxU32 maxColumn = getMaxColumn(localBound.maximum.z, nbCols);
|
|
|
|
bool noTriangles = false;
|
|
|
|
// AD: This test whether we have any triangles at all.
|
|
// Given the clamping above I can only see this happening if we have a
|
|
// flat shape that lies exactly on one of the grid lines for sampling.
|
|
|
|
// Also, the 2x looks unnecessary here if my basic math isn't failing me.
|
|
// This is the same code as CPU and has been there since 2009, probably before,
|
|
// so I'm not going to change it now.
|
|
if ((2 * (maxColumn - minColumn) * (maxRow - minRow)) == 0)
|
|
{
|
|
noTriangles = true;
|
|
}
|
|
|
|
if (!noTriangles)
|
|
{
|
|
|
|
const PxReal miny = localBound.minimum.y;
|
|
const PxReal maxy = localBound.maximum.y;
|
|
|
|
const PxU32 columnSpan = maxColumn - minColumn;
|
|
|
|
//we have two materials corresponding to one vertexIndex, so each thread will deal with one of the materials
|
|
const PxU32 totalNumProcessed = (maxRow - minRow) * columnSpan * 2;
|
|
for (PxU32 i = 0; i < totalNumProcessed; i += WARP_SIZE)
|
|
{
|
|
bool result = false;
|
|
PxU32 triangleIndex = 0xFFffFFff;
|
|
|
|
const PxU32 workIndex = idxInWarp + i;
|
|
|
|
if (workIndex < totalNumProcessed)
|
|
{
|
|
const PxU32 index = workIndex / 2;
|
|
const PxU32 vertexIndex = (minRow + index / columnSpan) * nbCols + (minColumn + index % columnSpan);
|
|
assert(isValidVertex(vertexIndex, nbRows, nbCols));
|
|
PxReal h0 = getHeight(vertexIndex, samples);
|
|
PxReal h1 = getHeight(vertexIndex + 1, samples);
|
|
PxReal h2 = getHeight(vertexIndex + nbCols, samples);
|
|
PxReal h3 = getHeight(vertexIndex + nbCols + 1, samples);
|
|
const bool con0 = maxy < h0 && maxy < h1 && maxy < h2 && maxy < h3;
|
|
const bool con1 = miny > h0 && miny > h1 && miny > h2 && miny > h3;
|
|
|
|
if (!(con0 || con1))
|
|
{
|
|
const PxHeightFieldSample& sample = getSample(vertexIndex, samples);
|
|
|
|
const bool isMaterial1 = (workIndex & 1) ? 1 : 0;
|
|
PxU32 material = isMaterial1 ? sample.materialIndex1 : sample.materialIndex0;
|
|
if (material != PxHeightFieldMaterial::eHOLE)
|
|
{
|
|
triangleIndex = isMaterial1 ? ((vertexIndex << 1) + 1) : (vertexIndex << 1);
|
|
result = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
PxU32 resultWarp = __ballot_sync(FULL_MASK, result);
|
|
PxU32 offset = warpScanExclusive(resultWarp, idxInWarp);
|
|
PxU32 validCount = __popc(resultWarp);
|
|
|
|
// Allocate only amount of memory, needed for single warp-wide write
|
|
PxU32 prevNbPairs = 0xFFffFFff;
|
|
if (idxInWarp == 0 && validCount > 0)
|
|
{
|
|
prevNbPairs = atomicAdd(nbPairsFound, validCount);
|
|
}
|
|
|
|
prevNbPairs = __shfl_sync(FULL_MASK, prevNbPairs, 0);
|
|
|
|
if (result && (prevNbPairs + offset) < maxPairs)
|
|
{
|
|
stackBasePtr[prevNbPairs + offset] = make_uint4(cmIdx, triangleIndex, nbPairsPerCM + offset, npWorkItem.shapeRef1);
|
|
}
|
|
|
|
if ((validCount > 0) && ((validCount + prevNbPairs) >= maxPairs))
|
|
{
|
|
validCount = PxMax(maxPairs, prevNbPairs) - prevNbPairs;
|
|
}
|
|
|
|
assert(((validCount + prevNbPairs) <= maxPairs) || (validCount == 0));
|
|
|
|
nbPairsPerCM += validCount;
|
|
}
|
|
} // noTriangles
|
|
} // boundsDontOverlap
|
|
}
|
|
|
|
PxU32 prevIntermArraysOffset = 0xFFffFFff;
|
|
PxU32 prevIntermArraysPaddedOffset = 0xFFffFFff;
|
|
if (idxInWarp == 0 && nbPairsPerCM > 0)
|
|
{
|
|
prevIntermArraysOffset = atomicAdd(midphasePairsNum, nbPairsPerCM);
|
|
prevIntermArraysPaddedOffset = atomicAdd(midphasePairsNumPadded, ((nbPairsPerCM + 3)&(~3)) * 2); // AD: we need 2x space for the radix sort.
|
|
}
|
|
|
|
prevIntermArraysOffset = __shfl_sync(FULL_MASK, prevIntermArraysOffset, 0);
|
|
prevIntermArraysPaddedOffset = __shfl_sync(FULL_MASK, prevIntermArraysPaddedOffset, 0);
|
|
|
|
ConvexMeshPair pairInfo;
|
|
pairInfo.aToB = s_warpScratch->convexToHeightfieldNoScale;
|
|
pairInfo.cmIndex = cmIdx;
|
|
pairInfo.startIndex = prevIntermArraysOffset;
|
|
pairInfo.count = fullContactGen ? nbPairsPerCM : CONVEX_TRIMESH_CACHED;
|
|
pairInfo.roundedStartIndex = prevIntermArraysPaddedOffset;
|
|
pairInfo.materialIndices = make_uint2(s_warpScratch->convexShape_materialIndex, s_warpScratch->heightfieldShape_materialIndex);
|
|
|
|
ConvexMeshPair_WriteWarp(cvxTrimeshPair + cmIdx, pairInfo);
|
|
|
|
assert(*midphasePairsNum <= maxPairs);
|
|
}
|
|
}
|
|
|
|
extern "C" __global__ void convexHeightFieldMidphase(
|
|
PxU32 numContactManagers,
|
|
const PxReal toleranceLength,
|
|
const PxgContactManagerInput* PX_RESTRICT cmInputs,
|
|
const PxsCachedTransform* PX_RESTRICT transformCache,
|
|
const PxBounds3* PX_RESTRICT bounds,
|
|
const PxReal* PX_RESTRICT contactDistance,
|
|
const PxgShape* PX_RESTRICT gpuShapes,
|
|
|
|
ConvexMeshPair* PX_RESTRICT cvxTrimeshPair,
|
|
PxgPersistentContactMultiManifold* PX_RESTRICT multiManifolds,
|
|
PxsContactManagerOutput* PX_RESTRICT cmOutputs,
|
|
PxU8* PX_RESTRICT stackPtr,
|
|
PxU32* PX_RESTRICT stackOffset,
|
|
PxU32* PX_RESTRICT midphasePairsNum,
|
|
PxU32* PX_RESTRICT midphasePairsNumPadded,
|
|
const PxU32 stackSizeBytes
|
|
)
|
|
{
|
|
__shared__ PxU32 scratchMem[MIDPHASE_WARPS_PER_BLOCK][WARP_SIZE * 2];
|
|
|
|
const PxU32 maxPairs = calculateMaxPairs(stackSizeBytes, numContactManagers);
|
|
|
|
heightfieldMidphaseCore<MIDPHASE_WARPS_PER_BLOCK>(
|
|
|
|
numContactManagers,
|
|
toleranceLength,
|
|
cmInputs,
|
|
transformCache,
|
|
bounds,
|
|
contactDistance,
|
|
gpuShapes,
|
|
|
|
cvxTrimeshPair,
|
|
multiManifolds,
|
|
cmOutputs,
|
|
reinterpret_cast<uint4*>(stackPtr),
|
|
stackOffset,
|
|
midphasePairsNum,
|
|
midphasePairsNumPadded,
|
|
|
|
(HeigtFieldDataScratch*)scratchMem[threadIdx.y],
|
|
maxPairs
|
|
);
|
|
}
|