Files
XCEngine/engine/third_party/physx/source/gpunarrowphase/src/CUDA/convexHFMidphase.cu

395 lines
14 KiB
Plaintext

// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#include "foundation/PxBounds3.h"
#include "foundation/PxSimpleTypes.h"
#include "foundation/PxTransform.h"
#include "geometry/PxGeometry.h"
#include "geometry/PxHeightFieldSample.h"
#include "PxgContactManager.h"
#include "PxgNpKernelIndices.h"
#include "PxgPersistentContactManifold.h"
#include "PxgSimulationCoreDesc.h"
#include "PxsContactManagerState.h"
#include "PxsTransformCache.h"
#include "convexNpCommon.h"
#include "cudaNpCommon.h"
#include "PxgCommonDefines.h"
#include "dataReadWriteHelper.cuh"
#include "heightfieldUtil.cuh"
#include "manifold.cuh"
#include "midphaseAllocate.cuh"
#include <vector_types.h>
using namespace physx;
extern "C" __host__ void initNarrowphaseKernels0() {}
PX_ALIGN_PREFIX(16)
struct HeigtFieldDataScratch
{
PxTransform convexToHeightfieldNoScale;
PxU32 convexShape_materialIndex;
PxU32 heightfieldShape_materialIndex;
}PX_ALIGN_SUFFIX(16);
template<unsigned int WarpsPerBlock>
__device__ static inline void heightfieldMidphaseCore(
PxU32 numContactManagers,
const PxReal toleranceLength,
const PxgContactManagerInput* PX_RESTRICT cmInputs,
const PxsCachedTransform* PX_RESTRICT transformCache,
const PxBounds3* PX_RESTRICT bounds,
const PxReal* PX_RESTRICT contactDistance,
const PxgShape* PX_RESTRICT gpuShapes,
ConvexMeshPair* PX_RESTRICT cvxTrimeshPair,
PxgPersistentContactMultiManifold* PX_RESTRICT multiManifolds,
PxsContactManagerOutput * PX_RESTRICT cmOutputs,
uint4* PX_RESTRICT stackBasePtr,
PxU32* PX_RESTRICT nbPairsFound,
PxU32* PX_RESTRICT midphasePairsNum,
PxU32* PX_RESTRICT midphasePairsNumPadded,
HeigtFieldDataScratch* s_warpScratch,
const PxU32 maxPairs
)
{
//thread index in warp
const unsigned int idxInWarp = threadIdx.x;
//wrap index
const unsigned int warpIdx = threadIdx.y;
unsigned int cmIdx = warpIdx + blockIdx.x * blockDim.y;
//this is number of contact managers
PxU32 nbPairsPerCM = 0;
if (cmIdx < numContactManagers)
{
PxgContactManagerInput npWorkItem;
PxgContactManagerInput_ReadWarp(npWorkItem, cmInputs, cmIdx);
PxsCachedTransform transformCached, heightfieldTransformCached;
PxsCachedTransform_ReadWarp(transformCached, transformCache + npWorkItem.transformCacheRef0);
PxsCachedTransform_ReadWarp(heightfieldTransformCached, transformCache + npWorkItem.transformCacheRef1);
const PxTransform shapeToHeightfieldNoScale = heightfieldTransformCached.transform.transformInv(transformCached.transform);
//read convex/sphere shape
PxgShape shape;
PxgShape_ReadWarp(shape, gpuShapes + npWorkItem.shapeRef0);
PxReal ratio, minMargin, breakingThresholdRatio;
if (shape.type == PxGeometryType::eSPHERE)
{
minMargin = shape.scale.scale.x; //sphere radius
ratio = 0.02f;
breakingThresholdRatio = 0.05f;
}
else if (shape.type == PxGeometryType::eCAPSULE)
{
minMargin = shape.scale.scale.y; //capsule radius
ratio = 0.02;
breakingThresholdRatio = 0.05f;
}
else
{
PxU8* hullPtr = reinterpret_cast<PxU8*>(shape.hullOrMeshPtr);
const float4 extents4_f = *reinterpret_cast<float4*>(hullPtr + sizeof(float4) * 2);
minMargin = calculatePCMConvexMargin(extents4_f, shape.scale.scale, toleranceLength);
ratio = 0.2f;
breakingThresholdRatio = 0.8f;
}
/*PxU8* hullPtr = reinterpret_cast<PxU8*>(convexShape.hullOrMeshPtr);
const float4 extents4_f = *reinterpret_cast<float4*>(hullPtr + sizeof(float4) * 2);
const PxReal minMargin = calculatePCMConvexMargin(extents4_f, convexShape.scale.scale, toleranceLength);*/
bool lostContacts = false;
const bool invalidate = invalidateManifold(shapeToHeightfieldNoScale, multiManifolds[cmIdx], minMargin, ratio);
if (!invalidate)
{
const PxReal projectBreakingThreshold = minMargin * breakingThresholdRatio;
lostContacts = refreshManifolds(
shapeToHeightfieldNoScale,
projectBreakingThreshold,
multiManifolds + cmIdx
);
}
bool fullContactGen = invalidate || lostContacts;
//read heightfield shape
PxgShape heightFieldShape;
PxgShape_ReadWarp(heightFieldShape, gpuShapes + npWorkItem.shapeRef1);
if (idxInWarp == 0)
{
s_warpScratch->convexToHeightfieldNoScale = shapeToHeightfieldNoScale;
s_warpScratch->convexShape_materialIndex = shape.materialIndex;
s_warpScratch->heightfieldShape_materialIndex = heightFieldShape.materialIndex;
}
__syncwarp();
//if invalidate is true, generate full contacts
if (fullContactGen)
{
PxU32* heightfieldData = reinterpret_cast<PxU32*>(heightFieldShape.hullOrMeshPtr);
const PxU32 nbRows = heightfieldData[0];
const PxU32 nbCols = heightfieldData[1];
PxHeightFieldSample* samples = reinterpret_cast<PxHeightFieldSample*>(&heightfieldData[2]);
const PxReal oneOverHeightScale = 1.f / heightFieldShape.scale.scale.y;
const PxReal oneOverRowScale = 1.f / PxAbs(heightFieldShape.scale.scale.x);
const PxReal oneOverlColScale = 1.f / PxAbs(heightFieldShape.scale.scale.z);
PxBounds3 worldBound;
PxBounds3_ReadWarp(worldBound, bounds + npWorkItem.transformCacheRef0);
//bound is in world space, we need to transform the bound to the local space of height field
PxBounds3 localBound = PxBounds3::transformFast(heightfieldTransformCached.transform.getInverse(), worldBound);
const PxReal contactDist = contactDistance[npWorkItem.transformCacheRef0] + contactDistance[npWorkItem.transformCacheRef1];
localBound.fattenFast(contactDist);
localBound.minimum.x *= oneOverRowScale;
localBound.minimum.y *= oneOverHeightScale;
localBound.minimum.z *= oneOverlColScale;
localBound.maximum.x *= oneOverRowScale;
localBound.maximum.y *= oneOverHeightScale;
localBound.maximum.z *= oneOverlColScale;
//row scale
if (heightFieldShape.scale.scale.x < 0.f)
{
//swap min and max row scale
const PxReal temp = localBound.minimum.x;
localBound.minimum.x = localBound.maximum.x;
localBound.maximum.x = temp;
}
//col scale
if (heightFieldShape.scale.scale.z < 0.f)
{
PxReal swap = localBound.minimum.z;
localBound.minimum.z = localBound.maximum.z;
localBound.maximum.z = swap;
}
bool boundsDontOverlap = false;
// this tests if the complete shape is outside of the bounds of the HF in the XZ plane.
if ((localBound.minimum.x > nbRows - 1) || (localBound.minimum.z > nbCols - 1)
|| (localBound.maximum.x < 0) || (localBound.maximum.z < 0))
{
boundsDontOverlap = true;
}
if (!boundsDontOverlap)
{
PxU32 minRow = getMinRow(localBound.minimum.x, nbRows);
PxU32 maxRow = getMaxRow(localBound.maximum.x, nbRows);
PxU32 minColumn = getMinColumn(localBound.minimum.z, nbCols);
PxU32 maxColumn = getMaxColumn(localBound.maximum.z, nbCols);
bool noTriangles = false;
// AD: This test whether we have any triangles at all.
// Given the clamping above I can only see this happening if we have a
// flat shape that lies exactly on one of the grid lines for sampling.
// Also, the 2x looks unnecessary here if my basic math isn't failing me.
// This is the same code as CPU and has been there since 2009, probably before,
// so I'm not going to change it now.
if ((2 * (maxColumn - minColumn) * (maxRow - minRow)) == 0)
{
noTriangles = true;
}
if (!noTriangles)
{
const PxReal miny = localBound.minimum.y;
const PxReal maxy = localBound.maximum.y;
const PxU32 columnSpan = maxColumn - minColumn;
//we have two materials corresponding to one vertexIndex, so each thread will deal with one of the materials
const PxU32 totalNumProcessed = (maxRow - minRow) * columnSpan * 2;
for (PxU32 i = 0; i < totalNumProcessed; i += WARP_SIZE)
{
bool result = false;
PxU32 triangleIndex = 0xFFffFFff;
const PxU32 workIndex = idxInWarp + i;
if (workIndex < totalNumProcessed)
{
const PxU32 index = workIndex / 2;
const PxU32 vertexIndex = (minRow + index / columnSpan) * nbCols + (minColumn + index % columnSpan);
assert(isValidVertex(vertexIndex, nbRows, nbCols));
PxReal h0 = getHeight(vertexIndex, samples);
PxReal h1 = getHeight(vertexIndex + 1, samples);
PxReal h2 = getHeight(vertexIndex + nbCols, samples);
PxReal h3 = getHeight(vertexIndex + nbCols + 1, samples);
const bool con0 = maxy < h0 && maxy < h1 && maxy < h2 && maxy < h3;
const bool con1 = miny > h0 && miny > h1 && miny > h2 && miny > h3;
if (!(con0 || con1))
{
const PxHeightFieldSample& sample = getSample(vertexIndex, samples);
const bool isMaterial1 = (workIndex & 1) ? 1 : 0;
PxU32 material = isMaterial1 ? sample.materialIndex1 : sample.materialIndex0;
if (material != PxHeightFieldMaterial::eHOLE)
{
triangleIndex = isMaterial1 ? ((vertexIndex << 1) + 1) : (vertexIndex << 1);
result = true;
}
}
}
PxU32 resultWarp = __ballot_sync(FULL_MASK, result);
PxU32 offset = warpScanExclusive(resultWarp, idxInWarp);
PxU32 validCount = __popc(resultWarp);
// Allocate only amount of memory, needed for single warp-wide write
PxU32 prevNbPairs = 0xFFffFFff;
if (idxInWarp == 0 && validCount > 0)
{
prevNbPairs = atomicAdd(nbPairsFound, validCount);
}
prevNbPairs = __shfl_sync(FULL_MASK, prevNbPairs, 0);
if (result && (prevNbPairs + offset) < maxPairs)
{
stackBasePtr[prevNbPairs + offset] = make_uint4(cmIdx, triangleIndex, nbPairsPerCM + offset, npWorkItem.shapeRef1);
}
if ((validCount > 0) && ((validCount + prevNbPairs) >= maxPairs))
{
validCount = PxMax(maxPairs, prevNbPairs) - prevNbPairs;
}
assert(((validCount + prevNbPairs) <= maxPairs) || (validCount == 0));
nbPairsPerCM += validCount;
}
} // noTriangles
} // boundsDontOverlap
}
PxU32 prevIntermArraysOffset = 0xFFffFFff;
PxU32 prevIntermArraysPaddedOffset = 0xFFffFFff;
if (idxInWarp == 0 && nbPairsPerCM > 0)
{
prevIntermArraysOffset = atomicAdd(midphasePairsNum, nbPairsPerCM);
prevIntermArraysPaddedOffset = atomicAdd(midphasePairsNumPadded, ((nbPairsPerCM + 3)&(~3)) * 2); // AD: we need 2x space for the radix sort.
}
prevIntermArraysOffset = __shfl_sync(FULL_MASK, prevIntermArraysOffset, 0);
prevIntermArraysPaddedOffset = __shfl_sync(FULL_MASK, prevIntermArraysPaddedOffset, 0);
ConvexMeshPair pairInfo;
pairInfo.aToB = s_warpScratch->convexToHeightfieldNoScale;
pairInfo.cmIndex = cmIdx;
pairInfo.startIndex = prevIntermArraysOffset;
pairInfo.count = fullContactGen ? nbPairsPerCM : CONVEX_TRIMESH_CACHED;
pairInfo.roundedStartIndex = prevIntermArraysPaddedOffset;
pairInfo.materialIndices = make_uint2(s_warpScratch->convexShape_materialIndex, s_warpScratch->heightfieldShape_materialIndex);
ConvexMeshPair_WriteWarp(cvxTrimeshPair + cmIdx, pairInfo);
assert(*midphasePairsNum <= maxPairs);
}
}
extern "C" __global__ void convexHeightFieldMidphase(
PxU32 numContactManagers,
const PxReal toleranceLength,
const PxgContactManagerInput* PX_RESTRICT cmInputs,
const PxsCachedTransform* PX_RESTRICT transformCache,
const PxBounds3* PX_RESTRICT bounds,
const PxReal* PX_RESTRICT contactDistance,
const PxgShape* PX_RESTRICT gpuShapes,
ConvexMeshPair* PX_RESTRICT cvxTrimeshPair,
PxgPersistentContactMultiManifold* PX_RESTRICT multiManifolds,
PxsContactManagerOutput* PX_RESTRICT cmOutputs,
PxU8* PX_RESTRICT stackPtr,
PxU32* PX_RESTRICT stackOffset,
PxU32* PX_RESTRICT midphasePairsNum,
PxU32* PX_RESTRICT midphasePairsNumPadded,
const PxU32 stackSizeBytes
)
{
__shared__ PxU32 scratchMem[MIDPHASE_WARPS_PER_BLOCK][WARP_SIZE * 2];
const PxU32 maxPairs = calculateMaxPairs(stackSizeBytes, numContactManagers);
heightfieldMidphaseCore<MIDPHASE_WARPS_PER_BLOCK>(
numContactManagers,
toleranceLength,
cmInputs,
transformCache,
bounds,
contactDistance,
gpuShapes,
cvxTrimeshPair,
multiManifolds,
cmOutputs,
reinterpret_cast<uint4*>(stackPtr),
stackOffset,
midphasePairsNum,
midphasePairsNumPadded,
(HeigtFieldDataScratch*)scratchMem[threadIdx.y],
maxPairs
);
}