Files
XCEngine/engine/third_party/physx/source/gpunarrowphase/src/PxgGeometryManager.cpp

773 lines
24 KiB
C++
Raw Normal View History

// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#include "PxgGeometryManager.h"
#include "PxsHeapMemoryAllocator.h"
#include "foundation/PxAssert.h"
#include "foundation/PxBasicTemplates.h"
#include "foundation/PxMemory.h"
#include "foundation/PxSimpleTypes.h"
#include "foundation/PxVec3.h"
#include "PxgCopyManager.h"
#include "PxgCudaUtils.h"
#include "PxgHeapMemAllocator.h"
#include "GuBV32.h"
#include "convex/GuConvexMesh.h"
#include "mesh/GuTriangleMesh.h"
#include "hf/GuHeightField.h"
#include "GuSDF.h"
#include "cudaNpCommon.h"
#include "cudamanager/PxCudaContextManager.h"
#include "cudamanager/PxCudaContext.h"
#include <vector_types.h>
#include <vector_functions.h>
// AD: PxgGeometryManager manages the CPU/GPU data transfers and the lifetime of collision geometries: Convex Hulls, Trimeshes, SDFs, Heightfields.
using namespace physx;
using namespace Gu;
// forward declarations of static functions
static PxU64 computeBoxHullByteSize();
static PxU64 computeHullByteSize(const ConvexHullData& hull, PxU32& numPolyVertices);
static PxU64 computeTriMeshByteSize(const TriangleMesh& triMesh);
static PxU64 computeHeightfieldByteSize(const HeightFieldData& hf);
static void layOutBoxHull(void* mem);
static void layOutHull(void* mem, const ConvexHullData& hull, PxU32 numPolyVertices);
static PxgMeshTextureData layOutTriMesh(void* mem, const TriangleMesh& triMesh, CUstream stream);
static void layOutHeightfield(void* mem, const HeightFieldData& hf);
template<typename T> static void createTextureObject(CUarray_format format, CUtexObject*& texture, CUarray& cuArray, PxU32 width, PxU32 height, PxU32 depth, const T* data, CUstream stream);
// PxgGeometryManager definitions
PxgGeometryManager::PxgGeometryManager(PxgHeapMemoryAllocatorManager* heapMemoryManager):
mDeviceMemoryAllocator(heapMemoryManager->mDeviceMemoryAllocators),
mPinnedMemoryAllocator(static_cast<PxgHeapMemoryAllocator*>(heapMemoryManager->mMappedMemoryAllocators)),
mPinnedMemoryBasePtr(NULL),
mPinnedHostMemoryRequirements(0),
mFreeGeometryIndices(mGeometryData),
mBoxHullIdx(0xFFffFFff)
{
}
PxgGeometryManager::~PxgGeometryManager()
{
if (mBoxHullIdx != 0xFFFFFFFF)
mDeviceMemoryAllocator->deallocate(reinterpret_cast<void*>(getGeometryDevPtrByIndex(mBoxHullIdx)));
if (mPinnedMemoryBasePtr)
{
mPinnedMemoryAllocator->deallocate(mPinnedMemoryBasePtr);
mPinnedMemoryBasePtr = NULL;
}
}
PxU32 PxgGeometryManager::addGeometryInternal(PxU64 byteSize, const void* geomPtr, UploadGeometryType::Enum type, PxU32 numPolyVertices /*= 0*/)
{
byteSize = (byteSize + 255) & ~255;
mPinnedHostMemoryRequirements += byteSize;
PxU32 idx = mFreeGeometryIndices.getFreeIndex();
void* devicePtr = mDeviceMemoryAllocator->allocate(byteSize, PxsHeapStats::eNARROWPHASE, PX_FL);
PxU32 copyIndex = mScheduledCopies.size();
ScheduledCopyData scheduledCopy;
scheduledCopy.mHullOrTrimeshIdx = idx;
scheduledCopy.mGeometryPtr = geomPtr;
scheduledCopy.mType = type;
scheduledCopy.mNumPolyVertices = numPolyVertices;
PxgCopyManager::CopyDesc desc;
desc.dest = (size_t)devicePtr;
desc.bytes = (size_t)byteSize;
scheduledCopy.mCopyDesc = desc;
mScheduledCopies.pushBack(scheduledCopy);
HullOrMeshData newHullOrMesh;
newHullOrMesh.mDeviceMemPointer = devicePtr;
newHullOrMesh.mCopyDescIndex = copyIndex;
PX_ASSERT(idx < mGeometryData.size());
mGeometryData[idx] = newHullOrMesh;
return idx;
}
void PxgGeometryManager::addBoxHull()
{
if (mBoxHullIdx != 0xFFffFFff)
return;
PxU64 byteSize = computeBoxHullByteSize();
mBoxHullIdx = addGeometryInternal(byteSize, NULL, UploadGeometryType::eBOXHULL);
}
PxU32 PxgGeometryManager::addHull(const ConvexHullData& hull)
{
PxU32 numPolyVertices;
PxU64 byteSize = computeHullByteSize(hull, numPolyVertices);
return addGeometryInternal(byteSize, &hull, UploadGeometryType::eCONVEXHULL, numPolyVertices);
}
void PxgGeometryManager::removeGeometry(PxU32 idx)
{
PX_ASSERT(idx < mGeometryData.size());
HullOrMeshData geometryToRemove = mGeometryData[idx];
PxU32 scheduledCopyIndex = geometryToRemove.mCopyDescIndex;
if (scheduledCopyIndex != HullOrMeshData::INVALID_COPY_DESC_INDEX)
{
PxU32 lastScheduledCopyHullOrTrimeshIndex = mScheduledCopies.back().mHullOrTrimeshIdx;
if (lastScheduledCopyHullOrTrimeshIndex != idx)
{
mScheduledCopies.replaceWithLast(scheduledCopyIndex);
PX_ASSERT(lastScheduledCopyHullOrTrimeshIndex < mGeometryData.size());
mGeometryData[lastScheduledCopyHullOrTrimeshIndex].mCopyDescIndex = scheduledCopyIndex;
}
else
{
mScheduledCopies.popBack();
}
}
const PxPair<void*const, PxgMeshTextureData>* pair = mMeshToTextureMap.find(geometryToRemove.mDeviceMemPointer);
if (pair)
{
cuArrayDestroy(pair->second.cuArray);
cuTexObjectDestroy(pair->second.cuTexRef);
if (pair->second.cuArraySubgrids)
{
cuArrayDestroy(pair->second.cuArraySubgrids);
cuTexObjectDestroy(pair->second.cuTexRefSubgrids);
}
mMeshToTextureMap.erase(geometryToRemove.mDeviceMemPointer);
}
mDeviceMemoryAllocator->deallocate(geometryToRemove.mDeviceMemPointer);
mFreeGeometryIndices.setFreeIndex(idx);
}
void PxgGeometryManager::scheduleCopyHtoD(PxgCopyManager& copyMan, PxCudaContext& cudaContext, CUstream stream)
{
// allocate the proper amount of pinned memory
mPinnedMemoryBasePtr = mPinnedMemoryAllocator->allocate(mPinnedHostMemoryRequirements, PxsHeapStats::eNARROWPHASE, PX_FL);
PxU8* basePtr = reinterpret_cast<PxU8*>(mPinnedMemoryBasePtr);
for (PxArray<ScheduledCopyData>::Iterator it = mScheduledCopies.begin(), end = mScheduledCopies.end(); it != end; ++it)
{
PxU64 byteSize = it->mCopyDesc.bytes;
PxU8* mem = basePtr;
basePtr += byteSize;
PX_ASSERT((byteSize & 255) == 0);
PX_ASSERT(((size_t)mem & 255) == 0);
// layout the geometry in pinned memory depending on type
switch (it->mType)
{
case UploadGeometryType::eTRIANGLEMESH:
{
PxgMeshTextureData texData = layOutTriMesh(mem, *reinterpret_cast<const TriangleMesh*>(it->mGeometryPtr), stream);
if (texData.cuArray)
{
PX_ASSERT(it->mHullOrTrimeshIdx < mGeometryData.size());
mMeshToTextureMap.insert(mGeometryData[it->mHullOrTrimeshIdx].mDeviceMemPointer, texData);
}
}
break;
case UploadGeometryType::eCONVEXHULL:
layOutHull(mem, *reinterpret_cast<const ConvexHullData*>(it->mGeometryPtr), it->mNumPolyVertices);
break;
case UploadGeometryType::eHEIGHTFIELD:
layOutHeightfield(mem, *reinterpret_cast<const HeightFieldData*>(it->mGeometryPtr));
break;
case UploadGeometryType::eBOXHULL:
layOutBoxHull(mem);
break;
default:
PX_ALWAYS_ASSERT();
}
// set the source pointer in the copy descriptor.
it->mCopyDesc.source = reinterpret_cast<size_t>(getMappedDevicePtr(&cudaContext, mem));
// schedule the copies.
PX_ASSERT(it->mHullOrTrimeshIdx < mGeometryData.size());
mGeometryData[it->mHullOrTrimeshIdx].mCopyDescIndex = HullOrMeshData::INVALID_COPY_DESC_INDEX;
copyMan.pushDeferredHtoD(it->mCopyDesc);
}
mScheduledCopies.forceSize_Unsafe(0);
mPinnedHostMemoryRequirements = 0;
}
void PxgGeometryManager::resetAfterMemcpyCompleted()
{
// AD: we basically never send geometry each frame, and they're usually sized differently anyway, so let's give this memory back
// to keep pinned memory usage under control.
if (mPinnedMemoryBasePtr)
{
mPinnedMemoryAllocator->deallocate(mPinnedMemoryBasePtr);
mPinnedMemoryBasePtr = NULL;
}
mFreeGeometryIndices.releaseFreeIndices();
}
CUdeviceptr PxgGeometryManager::getGeometryDevPtrByIndex(PxU32 idx) const
{
PX_ASSERT(idx < mGeometryData.size());
return reinterpret_cast<CUdeviceptr>(mGeometryData[idx].mDeviceMemPointer);
}
CUdeviceptr PxgGeometryManager::getBoxHullDevPtr() const
{
PX_ASSERT(mBoxHullIdx != 0xFFffFFff);
PX_ASSERT(mBoxHullIdx < mGeometryData.size());
return reinterpret_cast<CUdeviceptr>(mGeometryData[mBoxHullIdx].mDeviceMemPointer);
}
PxU32 PxgGeometryManager::addTriMesh(const TriangleMesh& triMesh)
{
PxU64 byteSize = computeTriMeshByteSize(triMesh);
return addGeometryInternal(byteSize, &triMesh, UploadGeometryType::eTRIANGLEMESH);
}
PxU32 PxgGeometryManager::addHeightfield(const HeightFieldData& hf)
{
PxU64 byteSize = computeHeightfieldByteSize(hf);
return addGeometryInternal(byteSize, &hf, UploadGeometryType::eHEIGHTFIELD);
}
// static functions
static PxU64 computeBoxHullByteSize()
{
PxU64 byteSize = sizeof(float4) + //center of mass
sizeof(PxU32) + // NbEdgesNbHullVerticesNbPolygons
3 * sizeof(PxU32) + //pad
sizeof(float4) + //extents
sizeof(float4) * 8 + //vertices
sizeof(float4) * 6 + //planes
sizeof(PxU32) * 6 + //vRef8NbVertsMinIndex0
sizeof(PxU16) * 2 * 12 + //mVerticesByEdges16
sizeof(PxU8) * 2 * 12 + //mFacesByEdges8
sizeof(PxU8) * 3 * 8 + //mFacesByVertices8
sizeof(PxU8) * 24; //vertexData8
return byteSize;
}
static PxU64 computeHullByteSize(const ConvexHullData& hull, PxU32& numPolyVertices)
{
numPolyVertices = 0;
for (PxU32 i = 0; i < hull.mNbPolygons; ++i)
{
numPolyVertices += hull.mPolygons[i].mNbVerts;
}
return
sizeof(float4) + //center of mass
sizeof(PxU32) + // NbEdgesNbHullVerticesNbPolygons
3 * sizeof(PxU32) + //pad
sizeof(float4) + //extents
sizeof(float4) * hull.mNbHullVertices +//vertices
sizeof(float4) * hull.mNbPolygons +//planes
sizeof(PxU32) * hull.mNbPolygons +//vRef8NbVertsMinIndex0
sizeof(PxU16) * 2 * hull.mNbEdges +//mVerticesByEdges16
sizeof(PxU8) * 2 * hull.mNbEdges +//mFacesByEdges8
sizeof(PxU8) * 3 * hull.mNbHullVertices +//mFacesByVertices8
sizeof(PxU8) * numPolyVertices;//vertexData8
}
static PxU64 computeTriMeshByteSize(const TriangleMesh& triMesh)
{
const PxU32 numTris = triMesh.getNbTrianglesFast();
const PxU32 numVerts = triMesh.getNbVerticesFast();
PxU64 meshDataSize =
sizeof(uint4) // (nbVerts, nbTris, meshAdjVerticiesTotal, nbBv32TreeNodes)
+ sizeof(float4) * numVerts // meshVerts
+ sizeof(uint4) * numTris // meshTriIndices
+ sizeof(uint4) * numTris // meshTriAdjacencies
+ sizeof(PxU32) * numTris // gpu to cpu remap table
+ sizeof(PxU32) * numTris * 3 // vertex to triangle remap table
+ sizeof(PxU32) * (numVerts + 1) // vertex to triangle offset table
;
// Make sure memory is always 16-byte aligned
meshDataSize = (meshDataSize + 15) & ~15;
meshDataSize += sizeof(uint4); // (sdfDimX, sdfDimY, sdfDimZ, 0)
const SDF& sdfData = triMesh.getSdfDataFast();
const PxU32 numSdfs = sdfData.mNumSdfs;
if (numSdfs > 0)
{
meshDataSize +=
+ sizeof(float4) // (meshLower.x, meshLower.y, meshLower.z, spacing)
+ sizeof(uint4)
+ sizeof(CUtexObject) // SDF texture object reference
+ sizeof(CUtexObject)
+ sizeof(PxU32) * sdfData.mNumStartSlots;
}
// Make sure memory is always 16-byte aligned
meshDataSize = (meshDataSize + 15) & ~15;
//ML: don't know whether we need to have local bound
const PxU64 bv32Size = triMesh.mGRB_BV32Tree->mNbPackedNodes * sizeof(BV32DataPacked);
return meshDataSize + bv32Size;
}
static PxU64 computeHeightfieldByteSize(const HeightFieldData& hf)
{
/* Height field height data is 16 bit signed integers, followed by triangle materials.
Each sample is 32 bits wide arranged as follows:
1) First there is a 16 bit height value.
2) Next, two one byte material indices, with the high bit of each byte reserved for special use.
(so the material index is only 7 bits).
The high bit of material0 is the tess-flag.
The high bit of material1 is reserved for future use.
*/
return sizeof(PxU32) //rows
+ sizeof(PxU32) //columns
+ sizeof(PxU32) * hf.columns * hf.rows
+ sizeof(PxU16); //PxHeightFieldFlags
}
static void layOutBoxHull(void* mem)
{
const float4 vertices[8] = { make_float4(-1.f, 1.f, -1.f, 0.f),
make_float4(1.f, 1.f, -1.f, 0.f),
make_float4(1.f, -1.f, -1.f, 0.f),
make_float4(-1.f, -1.f, -1.f, 0.f),
make_float4(-1.f, -1.f, 1.f, 0.f),
make_float4(-1.f, 1.f, 1.f, 0.f),
make_float4(1.f, -1.f, 1.f, 0.f),
make_float4(1.f, 1.f, 1.f, 0.f)
};
const float4 planes[6] = {make_float4(0.f, 0.0f, -1.0f, -1.0f),
make_float4(-1.0f, 0.0f, 0.0f, -1.0f),
make_float4(0.0f, -1.0f, 0.0f, -1.0f),
make_float4(0.0f, 1.0f, 0.0f, -1.0f),
make_float4(0.0f, 0.0f, 1.0f, -1.0f),
make_float4(1.0f, 0.0f, 0.0f, -1.0f)};
const PxU32 polyData[6] = {merge(0, 4, 4),
merge(4, 4, 1),
merge(8, 4, 0),
merge(12, 4, 2),
merge(16, 4, 0),
merge(20, 4, 0)};
const PxU16 vertsByEdges[24] = { 0, 1, 3, 0, 5, 0, 1, 2, 7, 1, 2, 3, 2, 6, 3, 4, 4, 5, 6, 4, 5, 7, 6, 7 };
const PxU8 facesByEdges[24] = {
0, 3,
0, 1,
1, 3,
0, 5,
3, 5,
0, 2,
2, 5,
1, 2,
1, 4,
2, 4,
3, 4,
4, 5
};
const PxU8 facesByVerts[24] = {
0, 1, 3,
0, 3, 5,
0, 2, 5,
0, 1, 2,
1, 2, 4,
1, 3, 4,
2, 4, 5,
3, 4, 5
};
const PxU8 polyIndices[24] = {0, 1, 2, 3,
4, 5, 0, 3,
4, 3, 2, 6,
7, 1, 0, 5,
7, 5, 4, 6,
7, 6, 2, 1
};
PxU8* m = reinterpret_cast<PxU8*>(mem);
*((float4*)m) = make_float4(0.0f, 0.0f, 0.0f, 0.0f);//center of mass
m += sizeof(float4);
*((PxU32*)m) = merge(12, merge((PxU8) 8, (PxU8) 6));
m += 4 * sizeof(PxU32);
*((float4*)m) = make_float4(1.0f, //half-extents and the insphere radius
1.0f,
1.0f,
1.0f);
m += sizeof(float4);
PxMemCopy(m, vertices, sizeof(float4) * 8);
m += sizeof(float4) * 8;
PxMemCopy(m, planes, sizeof(float4) * 6);
m += sizeof(float4) * 6;
PxMemCopy(m, polyData, sizeof(PxU32) * 6);
m += sizeof(PxU32) * 6;
PxMemCopy(m, vertsByEdges, sizeof(PxU16) * 2 * 12);
m += sizeof(PxU16) * 2 * 12;
PxMemCopy(m, facesByEdges, sizeof(PxU8) * 2 * 12);
m += sizeof(PxU8) * 2 * 12;
PxMemCopy(m, facesByVerts, sizeof(PxU8) * 3 * 8);
m += sizeof(PxU8) * 3 * 8;
PxMemCopy(m, polyIndices, sizeof(PxU8) * 24);
}
static void layOutHull(void* mem, const ConvexHullData& hull, PxU32 numPolyVertices)
{
PxU8* m = (PxU8*) mem;
*((float4*)m) = make_float4(hull.mCenterOfMass.x, hull.mCenterOfMass.y, hull.mCenterOfMass.z, 0.f);
m +=sizeof(float4);
*((PxU32*)m) = merge(hull.mNbEdges, merge(hull.mNbHullVertices, hull.mNbPolygons));
m += 4 * sizeof(PxU32);
*((float4*)m) = make_float4(hull.mInternal.mInternalExtents.x,
hull.mInternal.mInternalExtents.y,
hull.mInternal.mInternalExtents.z,
hull.mInternal.mInternalRadius);
m += sizeof(float4);
const PxVec3* verts = hull.getHullVertices();
for (const PxVec3* end = verts + hull.mNbHullVertices; verts < end; ++verts)
{
*((float4*)m) = make_float4(verts->x, verts->y, verts->z, 0);
m += sizeof(float4);
}
const HullPolygonData* polys = hull.mPolygons;
float4* planes = (float4*)m;
PxU32* vRef8NbVertsMinIndex = (PxU32*) (m + sizeof(float4) * hull.mNbPolygons);
for (const HullPolygonData* end = polys + hull.mNbPolygons; polys < end; ++polys)
{
*(planes++) = make_float4(polys->mPlane.n.x, polys->mPlane.n.y, polys->mPlane.n.z, polys->mPlane.d);
*(vRef8NbVertsMinIndex++) = merge(polys->mVRef8, polys->mNbVerts, polys->mMinIndex);
}
m += (sizeof(float4) + sizeof(PxU32)) * hull.mNbPolygons;
PxMemCopy(m, hull.getVerticesByEdges16(), sizeof(PxU16) * 2*hull.mNbEdges);
m += sizeof(PxU16) * 2*hull.mNbEdges;
PxMemCopy(m, hull.getFacesByEdges8(), sizeof(PxU8) * 2*hull.mNbEdges);
m += sizeof(PxU8) * 2*hull.mNbEdges;
PxMemCopy(m, hull.getFacesByVertices8(), sizeof(PxU8) * 3*hull.mNbHullVertices);
m += sizeof(PxU8) * 3*hull.mNbHullVertices;
PxMemCopy(m, hull.getVertexData8(), sizeof(PxU8) * numPolyVertices);
//m += sizeof(PxU8) * numPolyVertices;
}
static PxgMeshTextureData layOutTriMesh(void* mem, const TriangleMesh& triMesh, CUstream stream)
{
const PxU32 numTris = triMesh.getNbTrianglesFast();
const PxU32 numVerts = triMesh.getNbVerticesFast();
BV32Tree* bv32Tree = triMesh.mGRB_BV32Tree;
PxU8* m = (PxU8*) mem;
*((uint4*)m) = make_uint4(triMesh.getNbVerticesFast(), triMesh.getNbTrianglesFast(), 0, bv32Tree->mNbPackedNodes);
m += sizeof(uint4);
//Midphase
PxMemCopy(m, bv32Tree->mPackedNodes, sizeof(BV32DataPacked) * bv32Tree->mNbPackedNodes);
m += sizeof(BV32DataPacked) * bv32Tree->mNbPackedNodes;
PX_ASSERT(bv32Tree->mNbPackedNodes > 0);
// Core: Mesh data
//construct gpu friendly verts
float4* grbVerts = reinterpret_cast<float4*>(m);
const PxVec3* verts = triMesh.getVerticesFast();
for (PxU32 i = 0; i < numVerts; ++i)
{
grbVerts[i].x = verts[i].x;
grbVerts[i].y = verts[i].y;
grbVerts[i].z = verts[i].z;
grbVerts[i].w = 0.f;
}
m += numVerts * sizeof(float4);
uint4* grbTriInd = reinterpret_cast<uint4*>(m);
//copy triangle indices
if (triMesh.has16BitIndices())
{
const PxU16* triInds = reinterpret_cast<PxU16*>(triMesh.mGRB_triIndices);
for (PxU32 i = 0; i < numTris; ++i)
{
grbTriInd[i].x = triInds[3 * i + 0];
grbTriInd[i].y = triInds[3 * i + 1];
grbTriInd[i].z = triInds[3 * i + 2];
grbTriInd[i].w = 0;
}
}
else
{
const PxU32* triInds = reinterpret_cast<PxU32*>(triMesh.mGRB_triIndices);
for (PxU32 i = 0; i < numTris; ++i)
{
grbTriInd[i].x = triInds[3 * i + 0];
grbTriInd[i].y = triInds[3 * i + 1];
grbTriInd[i].z = triInds[3 * i + 2];
grbTriInd[i].w = 0;
}
}
//PxMemCopy(m, triMesh->mGRB_triIndices, numTris * sizeof(uint4));
m += numTris * sizeof(uint4);
//Adjacencies data for contact gen
PxMemCopy(m, triMesh.mGRB_triAdjacencies, numTris * sizeof(uint4));
m += numTris * sizeof(uint4);
//GPU to CPU remap table
PxMemCopy(m, triMesh.mGRB_faceRemap, numTris * sizeof(PxU32));
m += numTris * sizeof(PxU32);
//GPU to CPU remap table
PxMemCopy(m, triMesh.mAccumulatedTrianglesRef, numVerts * sizeof(PxU32));
reinterpret_cast<PxU32*>(m)[numVerts] = triMesh.mNbTrianglesReferences;
m += (numVerts + 1) * sizeof(PxU32);
//GPU to CPU remap table
PxMemCopy(m, triMesh.mTrianglesReferences, (numTris*3) * sizeof(PxU32));
m += (numTris * 3) * sizeof(PxU32);
// Make sure m is 16-byte aligned
m = (PxU8*)(((size_t)m + 15) & ~15);
// Copy sdf values
const SDF& sdfData = triMesh.getSdfDataFast();
*((uint4*)m) = make_uint4(sdfData.mDims.x, sdfData.mDims.y, sdfData.mDims.z, triMesh.getPreferSDFProjection() ? 1 : 0);
m += sizeof(uint4);
const PxU32 numSdfs = sdfData.mNumSdfs;
PxgMeshTextureData result;
PxMemZero(&result, sizeof(result));
if (numSdfs)
{
const PxVec3 meshLower = sdfData.mMeshLower;
const PxReal spacing = sdfData.mSpacing;
*((float4*)m) = make_float4(meshLower.x, meshLower.y, meshLower.z, spacing);
m += sizeof(float4);
*((uint4*)m) = make_uint4(sdfData.mSubgridSize, reinterpret_cast<const PxU32&>(sdfData.mSubgridsMinSdfValue), reinterpret_cast<const PxU32&>(sdfData.mSubgridsMaxSdfValue), sdfData.mNumStartSlots);
m += sizeof(uint4);
CUtexObject* pTexture = (CUtexObject*)m;
m += sizeof(CUtexObject);
CUtexObject* pTextureSubgrids = (CUtexObject*)m;
m += sizeof(CUtexObject);
if (sdfData.mSubgridSize > 0)
PxMemCopy(m, sdfData.mSubgridStartSlots, sdfData.mNumStartSlots * sizeof(PxU32));
m += sizeof(PxU32) * sdfData.mNumStartSlots;
if (sdfData.mSubgridSize == 0)
{
CUarray cuArray;
createTextureObject<PxReal>(CU_AD_FORMAT_FLOAT, pTexture, cuArray, sdfData.mDims.x, sdfData.mDims.y, sdfData.mDims.z, sdfData.mSdf, stream);
result.cuArray = cuArray;
result.cuTexRef = *pTexture;
result.cuArraySubgrids = 0;
result.cuTexRefSubgrids = 0;
}
else
{
PxU32 x = sdfData.mDims.x / sdfData.mSubgridSize;
PxU32 y = sdfData.mDims.y / sdfData.mSubgridSize;
PxU32 z = sdfData.mDims.z / sdfData.mSubgridSize;
CUarray cuArray;
createTextureObject<PxReal>(CU_AD_FORMAT_FLOAT, pTexture, cuArray, x + 1, y + 1, z + 1, sdfData.mSdf, stream);
CUarray cuArraySubgrids = 0;
switch(sdfData.mBytesPerSparsePixel)
{
case 1:
createTextureObject<PxU8>(CU_AD_FORMAT_UNSIGNED_INT8, pTextureSubgrids, cuArraySubgrids,
sdfData.mSdfSubgrids3DTexBlockDim.x * (sdfData.mSubgridSize + 1),
sdfData.mSdfSubgrids3DTexBlockDim.y * (sdfData.mSubgridSize + 1),
sdfData.mSdfSubgrids3DTexBlockDim.z * (sdfData.mSubgridSize + 1), sdfData.mSubgridSdf, stream);
break;
case 2:
createTextureObject<PxU16>(CU_AD_FORMAT_UNSIGNED_INT16, pTextureSubgrids, cuArraySubgrids,
sdfData.mSdfSubgrids3DTexBlockDim.x * (sdfData.mSubgridSize + 1),
sdfData.mSdfSubgrids3DTexBlockDim.y * (sdfData.mSubgridSize + 1),
sdfData.mSdfSubgrids3DTexBlockDim.z * (sdfData.mSubgridSize + 1), reinterpret_cast<PxU16*>(sdfData.mSubgridSdf), stream);
break;
case 4:
createTextureObject<PxReal>(CU_AD_FORMAT_FLOAT, pTextureSubgrids, cuArraySubgrids,
sdfData.mSdfSubgrids3DTexBlockDim.x * (sdfData.mSubgridSize + 1),
sdfData.mSdfSubgrids3DTexBlockDim.y * (sdfData.mSubgridSize + 1),
sdfData.mSdfSubgrids3DTexBlockDim.z * (sdfData.mSubgridSize + 1), reinterpret_cast<PxReal*>(sdfData.mSubgridSdf), stream);
break;
}
result.cuArray = cuArray;
result.cuTexRef = *pTexture;
if (cuArraySubgrids)
{
result.cuArraySubgrids = cuArraySubgrids;
result.cuTexRefSubgrids = *pTextureSubgrids;
}
else
{
result.cuArraySubgrids = 0;
result.cuTexRefSubgrids = 0;
}
}
}
return result;
}
static void layOutHeightfield(void* mem, const HeightFieldData& hf)
{
PxU8* m = (PxU8*) mem;
PxU32 nbRows = hf.rows;
PxU32 nbCols = hf.columns;
*((PxU32* ) m) = nbRows;
m += sizeof(PxU32);
*((PxU32* ) m) = nbCols;
m += sizeof(PxU32);
PX_ASSERT(sizeof(PxU32) == sizeof(PxHeightFieldSample));
PxMemCopy(m, hf.samples, sizeof(PxU32) * nbRows * nbCols);
m += sizeof(PxU32) * nbRows * nbCols;
*((PxU16*)m) = hf.flags;
}
template<typename T>
static void createTextureObject(CUarray_format format, CUtexObject*& texture, CUarray& cuArray, PxU32 width, PxU32 height, PxU32 depth, const T* data, CUstream stream)
{
if (width == 0 || height == 0 || depth == 0)
{
texture = 0;
cuArray = 0;
return;
}
CUDA_ARRAY3D_DESCRIPTOR_st arrDesc;
arrDesc.Format = format;
arrDesc.NumChannels = 1;
arrDesc.Width = width;
arrDesc.Height = height;
arrDesc.Depth = depth;
arrDesc.Flags = 0;
CUresult r = cuArray3DCreate(&cuArray, &arrDesc);
PX_UNUSED(r);
PX_ASSERT(r == CUDA_SUCCESS);
CUDA_MEMCPY3D copyParam;
PxMemZero(&copyParam, sizeof(copyParam));
copyParam.dstMemoryType = CU_MEMORYTYPE_ARRAY;
copyParam.dstArray = cuArray;
copyParam.srcMemoryType = CU_MEMORYTYPE_HOST;
copyParam.srcHost = data;
copyParam.srcPitch = width * sizeof(T);
copyParam.srcHeight = height;
copyParam.WidthInBytes = copyParam.srcPitch;
copyParam.Height = height;
copyParam.Depth = depth;
cuMemcpy3DAsync(&copyParam, stream);
CUDA_RESOURCE_DESC resDesc;
PxMemZero(&resDesc, sizeof(resDesc));
resDesc.resType = CU_RESOURCE_TYPE_ARRAY;
resDesc.res.array.hArray = cuArray;
CUDA_TEXTURE_DESC texDesc;
PxMemZero(&texDesc, sizeof(texDesc));
texDesc.addressMode[0] = CU_TR_ADDRESS_MODE_CLAMP;
texDesc.addressMode[1] = CU_TR_ADDRESS_MODE_CLAMP;
texDesc.addressMode[2] = CU_TR_ADDRESS_MODE_CLAMP;
texDesc.filterMode = CU_TR_FILTER_MODE_LINEAR;
r = cuTexObjectCreate(texture, &resDesc, &texDesc, NULL);
PX_ASSERT(r == CUDA_SUCCESS);
}