773 lines
24 KiB
C++
773 lines
24 KiB
C++
|
|
// Redistribution and use in source and binary forms, with or without
|
||
|
|
// modification, are permitted provided that the following conditions
|
||
|
|
// are met:
|
||
|
|
// * Redistributions of source code must retain the above copyright
|
||
|
|
// notice, this list of conditions and the following disclaimer.
|
||
|
|
// * Redistributions in binary form must reproduce the above copyright
|
||
|
|
// notice, this list of conditions and the following disclaimer in the
|
||
|
|
// documentation and/or other materials provided with the distribution.
|
||
|
|
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
||
|
|
// contributors may be used to endorse or promote products derived
|
||
|
|
// from this software without specific prior written permission.
|
||
|
|
//
|
||
|
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
|
||
|
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||
|
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||
|
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||
|
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||
|
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||
|
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||
|
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||
|
|
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||
|
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||
|
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||
|
|
//
|
||
|
|
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
|
||
|
|
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
|
||
|
|
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
|
||
|
|
|
||
|
|
#include "PxgGeometryManager.h"
|
||
|
|
|
||
|
|
#include "PxsHeapMemoryAllocator.h"
|
||
|
|
#include "foundation/PxAssert.h"
|
||
|
|
#include "foundation/PxBasicTemplates.h"
|
||
|
|
#include "foundation/PxMemory.h"
|
||
|
|
#include "foundation/PxSimpleTypes.h"
|
||
|
|
#include "foundation/PxVec3.h"
|
||
|
|
|
||
|
|
#include "PxgCopyManager.h"
|
||
|
|
#include "PxgCudaUtils.h"
|
||
|
|
#include "PxgHeapMemAllocator.h"
|
||
|
|
|
||
|
|
#include "GuBV32.h"
|
||
|
|
#include "convex/GuConvexMesh.h"
|
||
|
|
#include "mesh/GuTriangleMesh.h"
|
||
|
|
#include "hf/GuHeightField.h"
|
||
|
|
#include "GuSDF.h"
|
||
|
|
#include "cudaNpCommon.h"
|
||
|
|
|
||
|
|
#include "cudamanager/PxCudaContextManager.h"
|
||
|
|
#include "cudamanager/PxCudaContext.h"
|
||
|
|
#include <vector_types.h>
|
||
|
|
#include <vector_functions.h>
|
||
|
|
|
||
|
|
// AD: PxgGeometryManager manages the CPU/GPU data transfers and the lifetime of collision geometries: Convex Hulls, Trimeshes, SDFs, Heightfields.
|
||
|
|
|
||
|
|
using namespace physx;
|
||
|
|
using namespace Gu;
|
||
|
|
|
||
|
|
// forward declarations of static functions
|
||
|
|
static PxU64 computeBoxHullByteSize();
|
||
|
|
static PxU64 computeHullByteSize(const ConvexHullData& hull, PxU32& numPolyVertices);
|
||
|
|
static PxU64 computeTriMeshByteSize(const TriangleMesh& triMesh);
|
||
|
|
static PxU64 computeHeightfieldByteSize(const HeightFieldData& hf);
|
||
|
|
static void layOutBoxHull(void* mem);
|
||
|
|
static void layOutHull(void* mem, const ConvexHullData& hull, PxU32 numPolyVertices);
|
||
|
|
static PxgMeshTextureData layOutTriMesh(void* mem, const TriangleMesh& triMesh, CUstream stream);
|
||
|
|
static void layOutHeightfield(void* mem, const HeightFieldData& hf);
|
||
|
|
template<typename T> static void createTextureObject(CUarray_format format, CUtexObject*& texture, CUarray& cuArray, PxU32 width, PxU32 height, PxU32 depth, const T* data, CUstream stream);
|
||
|
|
|
||
|
|
// PxgGeometryManager definitions
|
||
|
|
PxgGeometryManager::PxgGeometryManager(PxgHeapMemoryAllocatorManager* heapMemoryManager):
|
||
|
|
mDeviceMemoryAllocator(heapMemoryManager->mDeviceMemoryAllocators),
|
||
|
|
mPinnedMemoryAllocator(static_cast<PxgHeapMemoryAllocator*>(heapMemoryManager->mMappedMemoryAllocators)),
|
||
|
|
mPinnedMemoryBasePtr(NULL),
|
||
|
|
mPinnedHostMemoryRequirements(0),
|
||
|
|
mFreeGeometryIndices(mGeometryData),
|
||
|
|
mBoxHullIdx(0xFFffFFff)
|
||
|
|
{
|
||
|
|
}
|
||
|
|
|
||
|
|
PxgGeometryManager::~PxgGeometryManager()
|
||
|
|
{
|
||
|
|
if (mBoxHullIdx != 0xFFFFFFFF)
|
||
|
|
mDeviceMemoryAllocator->deallocate(reinterpret_cast<void*>(getGeometryDevPtrByIndex(mBoxHullIdx)));
|
||
|
|
|
||
|
|
if (mPinnedMemoryBasePtr)
|
||
|
|
{
|
||
|
|
mPinnedMemoryAllocator->deallocate(mPinnedMemoryBasePtr);
|
||
|
|
mPinnedMemoryBasePtr = NULL;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
PxU32 PxgGeometryManager::addGeometryInternal(PxU64 byteSize, const void* geomPtr, UploadGeometryType::Enum type, PxU32 numPolyVertices /*= 0*/)
|
||
|
|
{
|
||
|
|
byteSize = (byteSize + 255) & ~255;
|
||
|
|
|
||
|
|
mPinnedHostMemoryRequirements += byteSize;
|
||
|
|
|
||
|
|
PxU32 idx = mFreeGeometryIndices.getFreeIndex();
|
||
|
|
void* devicePtr = mDeviceMemoryAllocator->allocate(byteSize, PxsHeapStats::eNARROWPHASE, PX_FL);
|
||
|
|
|
||
|
|
PxU32 copyIndex = mScheduledCopies.size();
|
||
|
|
|
||
|
|
ScheduledCopyData scheduledCopy;
|
||
|
|
scheduledCopy.mHullOrTrimeshIdx = idx;
|
||
|
|
scheduledCopy.mGeometryPtr = geomPtr;
|
||
|
|
scheduledCopy.mType = type;
|
||
|
|
scheduledCopy.mNumPolyVertices = numPolyVertices;
|
||
|
|
|
||
|
|
PxgCopyManager::CopyDesc desc;
|
||
|
|
desc.dest = (size_t)devicePtr;
|
||
|
|
desc.bytes = (size_t)byteSize;
|
||
|
|
|
||
|
|
scheduledCopy.mCopyDesc = desc;
|
||
|
|
mScheduledCopies.pushBack(scheduledCopy);
|
||
|
|
|
||
|
|
HullOrMeshData newHullOrMesh;
|
||
|
|
newHullOrMesh.mDeviceMemPointer = devicePtr;
|
||
|
|
newHullOrMesh.mCopyDescIndex = copyIndex;
|
||
|
|
|
||
|
|
PX_ASSERT(idx < mGeometryData.size());
|
||
|
|
mGeometryData[idx] = newHullOrMesh;
|
||
|
|
|
||
|
|
return idx;
|
||
|
|
}
|
||
|
|
|
||
|
|
void PxgGeometryManager::addBoxHull()
|
||
|
|
{
|
||
|
|
if (mBoxHullIdx != 0xFFffFFff)
|
||
|
|
return;
|
||
|
|
|
||
|
|
PxU64 byteSize = computeBoxHullByteSize();
|
||
|
|
|
||
|
|
mBoxHullIdx = addGeometryInternal(byteSize, NULL, UploadGeometryType::eBOXHULL);
|
||
|
|
}
|
||
|
|
|
||
|
|
PxU32 PxgGeometryManager::addHull(const ConvexHullData& hull)
|
||
|
|
{
|
||
|
|
PxU32 numPolyVertices;
|
||
|
|
PxU64 byteSize = computeHullByteSize(hull, numPolyVertices);
|
||
|
|
|
||
|
|
return addGeometryInternal(byteSize, &hull, UploadGeometryType::eCONVEXHULL, numPolyVertices);
|
||
|
|
}
|
||
|
|
|
||
|
|
void PxgGeometryManager::removeGeometry(PxU32 idx)
|
||
|
|
{
|
||
|
|
PX_ASSERT(idx < mGeometryData.size());
|
||
|
|
HullOrMeshData geometryToRemove = mGeometryData[idx];
|
||
|
|
|
||
|
|
PxU32 scheduledCopyIndex = geometryToRemove.mCopyDescIndex;
|
||
|
|
|
||
|
|
if (scheduledCopyIndex != HullOrMeshData::INVALID_COPY_DESC_INDEX)
|
||
|
|
{
|
||
|
|
PxU32 lastScheduledCopyHullOrTrimeshIndex = mScheduledCopies.back().mHullOrTrimeshIdx;
|
||
|
|
|
||
|
|
if (lastScheduledCopyHullOrTrimeshIndex != idx)
|
||
|
|
{
|
||
|
|
mScheduledCopies.replaceWithLast(scheduledCopyIndex);
|
||
|
|
|
||
|
|
PX_ASSERT(lastScheduledCopyHullOrTrimeshIndex < mGeometryData.size());
|
||
|
|
mGeometryData[lastScheduledCopyHullOrTrimeshIndex].mCopyDescIndex = scheduledCopyIndex;
|
||
|
|
}
|
||
|
|
else
|
||
|
|
{
|
||
|
|
mScheduledCopies.popBack();
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
const PxPair<void*const, PxgMeshTextureData>* pair = mMeshToTextureMap.find(geometryToRemove.mDeviceMemPointer);
|
||
|
|
|
||
|
|
if (pair)
|
||
|
|
{
|
||
|
|
cuArrayDestroy(pair->second.cuArray);
|
||
|
|
cuTexObjectDestroy(pair->second.cuTexRef);
|
||
|
|
|
||
|
|
if (pair->second.cuArraySubgrids)
|
||
|
|
{
|
||
|
|
cuArrayDestroy(pair->second.cuArraySubgrids);
|
||
|
|
cuTexObjectDestroy(pair->second.cuTexRefSubgrids);
|
||
|
|
}
|
||
|
|
|
||
|
|
mMeshToTextureMap.erase(geometryToRemove.mDeviceMemPointer);
|
||
|
|
}
|
||
|
|
|
||
|
|
mDeviceMemoryAllocator->deallocate(geometryToRemove.mDeviceMemPointer);
|
||
|
|
mFreeGeometryIndices.setFreeIndex(idx);
|
||
|
|
}
|
||
|
|
|
||
|
|
void PxgGeometryManager::scheduleCopyHtoD(PxgCopyManager& copyMan, PxCudaContext& cudaContext, CUstream stream)
|
||
|
|
{
|
||
|
|
// allocate the proper amount of pinned memory
|
||
|
|
mPinnedMemoryBasePtr = mPinnedMemoryAllocator->allocate(mPinnedHostMemoryRequirements, PxsHeapStats::eNARROWPHASE, PX_FL);
|
||
|
|
PxU8* basePtr = reinterpret_cast<PxU8*>(mPinnedMemoryBasePtr);
|
||
|
|
|
||
|
|
for (PxArray<ScheduledCopyData>::Iterator it = mScheduledCopies.begin(), end = mScheduledCopies.end(); it != end; ++it)
|
||
|
|
{
|
||
|
|
PxU64 byteSize = it->mCopyDesc.bytes;
|
||
|
|
|
||
|
|
PxU8* mem = basePtr;
|
||
|
|
basePtr += byteSize;
|
||
|
|
|
||
|
|
PX_ASSERT((byteSize & 255) == 0);
|
||
|
|
PX_ASSERT(((size_t)mem & 255) == 0);
|
||
|
|
|
||
|
|
// layout the geometry in pinned memory depending on type
|
||
|
|
switch (it->mType)
|
||
|
|
{
|
||
|
|
case UploadGeometryType::eTRIANGLEMESH:
|
||
|
|
{
|
||
|
|
PxgMeshTextureData texData = layOutTriMesh(mem, *reinterpret_cast<const TriangleMesh*>(it->mGeometryPtr), stream);
|
||
|
|
|
||
|
|
if (texData.cuArray)
|
||
|
|
{
|
||
|
|
PX_ASSERT(it->mHullOrTrimeshIdx < mGeometryData.size());
|
||
|
|
mMeshToTextureMap.insert(mGeometryData[it->mHullOrTrimeshIdx].mDeviceMemPointer, texData);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
break;
|
||
|
|
case UploadGeometryType::eCONVEXHULL:
|
||
|
|
layOutHull(mem, *reinterpret_cast<const ConvexHullData*>(it->mGeometryPtr), it->mNumPolyVertices);
|
||
|
|
break;
|
||
|
|
case UploadGeometryType::eHEIGHTFIELD:
|
||
|
|
layOutHeightfield(mem, *reinterpret_cast<const HeightFieldData*>(it->mGeometryPtr));
|
||
|
|
break;
|
||
|
|
case UploadGeometryType::eBOXHULL:
|
||
|
|
layOutBoxHull(mem);
|
||
|
|
break;
|
||
|
|
default:
|
||
|
|
PX_ALWAYS_ASSERT();
|
||
|
|
}
|
||
|
|
|
||
|
|
// set the source pointer in the copy descriptor.
|
||
|
|
it->mCopyDesc.source = reinterpret_cast<size_t>(getMappedDevicePtr(&cudaContext, mem));
|
||
|
|
|
||
|
|
// schedule the copies.
|
||
|
|
PX_ASSERT(it->mHullOrTrimeshIdx < mGeometryData.size());
|
||
|
|
mGeometryData[it->mHullOrTrimeshIdx].mCopyDescIndex = HullOrMeshData::INVALID_COPY_DESC_INDEX;
|
||
|
|
copyMan.pushDeferredHtoD(it->mCopyDesc);
|
||
|
|
}
|
||
|
|
|
||
|
|
mScheduledCopies.forceSize_Unsafe(0);
|
||
|
|
mPinnedHostMemoryRequirements = 0;
|
||
|
|
}
|
||
|
|
|
||
|
|
void PxgGeometryManager::resetAfterMemcpyCompleted()
|
||
|
|
{
|
||
|
|
// AD: we basically never send geometry each frame, and they're usually sized differently anyway, so let's give this memory back
|
||
|
|
// to keep pinned memory usage under control.
|
||
|
|
if (mPinnedMemoryBasePtr)
|
||
|
|
{
|
||
|
|
mPinnedMemoryAllocator->deallocate(mPinnedMemoryBasePtr);
|
||
|
|
mPinnedMemoryBasePtr = NULL;
|
||
|
|
}
|
||
|
|
|
||
|
|
mFreeGeometryIndices.releaseFreeIndices();
|
||
|
|
}
|
||
|
|
|
||
|
|
CUdeviceptr PxgGeometryManager::getGeometryDevPtrByIndex(PxU32 idx) const
|
||
|
|
{
|
||
|
|
PX_ASSERT(idx < mGeometryData.size());
|
||
|
|
|
||
|
|
return reinterpret_cast<CUdeviceptr>(mGeometryData[idx].mDeviceMemPointer);
|
||
|
|
}
|
||
|
|
|
||
|
|
CUdeviceptr PxgGeometryManager::getBoxHullDevPtr() const
|
||
|
|
{
|
||
|
|
PX_ASSERT(mBoxHullIdx != 0xFFffFFff);
|
||
|
|
PX_ASSERT(mBoxHullIdx < mGeometryData.size());
|
||
|
|
|
||
|
|
return reinterpret_cast<CUdeviceptr>(mGeometryData[mBoxHullIdx].mDeviceMemPointer);
|
||
|
|
}
|
||
|
|
|
||
|
|
PxU32 PxgGeometryManager::addTriMesh(const TriangleMesh& triMesh)
|
||
|
|
{
|
||
|
|
PxU64 byteSize = computeTriMeshByteSize(triMesh);
|
||
|
|
|
||
|
|
return addGeometryInternal(byteSize, &triMesh, UploadGeometryType::eTRIANGLEMESH);
|
||
|
|
}
|
||
|
|
|
||
|
|
PxU32 PxgGeometryManager::addHeightfield(const HeightFieldData& hf)
|
||
|
|
{
|
||
|
|
PxU64 byteSize = computeHeightfieldByteSize(hf);
|
||
|
|
|
||
|
|
return addGeometryInternal(byteSize, &hf, UploadGeometryType::eHEIGHTFIELD);
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
// static functions
|
||
|
|
static PxU64 computeBoxHullByteSize()
|
||
|
|
{
|
||
|
|
PxU64 byteSize = sizeof(float4) + //center of mass
|
||
|
|
sizeof(PxU32) + // NbEdgesNbHullVerticesNbPolygons
|
||
|
|
3 * sizeof(PxU32) + //pad
|
||
|
|
sizeof(float4) + //extents
|
||
|
|
sizeof(float4) * 8 + //vertices
|
||
|
|
sizeof(float4) * 6 + //planes
|
||
|
|
sizeof(PxU32) * 6 + //vRef8NbVertsMinIndex0
|
||
|
|
sizeof(PxU16) * 2 * 12 + //mVerticesByEdges16
|
||
|
|
sizeof(PxU8) * 2 * 12 + //mFacesByEdges8
|
||
|
|
sizeof(PxU8) * 3 * 8 + //mFacesByVertices8
|
||
|
|
sizeof(PxU8) * 24; //vertexData8
|
||
|
|
|
||
|
|
return byteSize;
|
||
|
|
}
|
||
|
|
|
||
|
|
static PxU64 computeHullByteSize(const ConvexHullData& hull, PxU32& numPolyVertices)
|
||
|
|
{
|
||
|
|
numPolyVertices = 0;
|
||
|
|
|
||
|
|
for (PxU32 i = 0; i < hull.mNbPolygons; ++i)
|
||
|
|
{
|
||
|
|
numPolyVertices += hull.mPolygons[i].mNbVerts;
|
||
|
|
}
|
||
|
|
|
||
|
|
return
|
||
|
|
sizeof(float4) + //center of mass
|
||
|
|
|
||
|
|
sizeof(PxU32) + // NbEdgesNbHullVerticesNbPolygons
|
||
|
|
3 * sizeof(PxU32) + //pad
|
||
|
|
sizeof(float4) + //extents
|
||
|
|
sizeof(float4) * hull.mNbHullVertices +//vertices
|
||
|
|
sizeof(float4) * hull.mNbPolygons +//planes
|
||
|
|
sizeof(PxU32) * hull.mNbPolygons +//vRef8NbVertsMinIndex0
|
||
|
|
sizeof(PxU16) * 2 * hull.mNbEdges +//mVerticesByEdges16
|
||
|
|
sizeof(PxU8) * 2 * hull.mNbEdges +//mFacesByEdges8
|
||
|
|
sizeof(PxU8) * 3 * hull.mNbHullVertices +//mFacesByVertices8
|
||
|
|
sizeof(PxU8) * numPolyVertices;//vertexData8
|
||
|
|
}
|
||
|
|
|
||
|
|
static PxU64 computeTriMeshByteSize(const TriangleMesh& triMesh)
|
||
|
|
{
|
||
|
|
const PxU32 numTris = triMesh.getNbTrianglesFast();
|
||
|
|
const PxU32 numVerts = triMesh.getNbVerticesFast();
|
||
|
|
|
||
|
|
PxU64 meshDataSize =
|
||
|
|
sizeof(uint4) // (nbVerts, nbTris, meshAdjVerticiesTotal, nbBv32TreeNodes)
|
||
|
|
+ sizeof(float4) * numVerts // meshVerts
|
||
|
|
+ sizeof(uint4) * numTris // meshTriIndices
|
||
|
|
+ sizeof(uint4) * numTris // meshTriAdjacencies
|
||
|
|
+ sizeof(PxU32) * numTris // gpu to cpu remap table
|
||
|
|
+ sizeof(PxU32) * numTris * 3 // vertex to triangle remap table
|
||
|
|
+ sizeof(PxU32) * (numVerts + 1) // vertex to triangle offset table
|
||
|
|
;
|
||
|
|
|
||
|
|
// Make sure memory is always 16-byte aligned
|
||
|
|
meshDataSize = (meshDataSize + 15) & ~15;
|
||
|
|
|
||
|
|
meshDataSize += sizeof(uint4); // (sdfDimX, sdfDimY, sdfDimZ, 0)
|
||
|
|
|
||
|
|
const SDF& sdfData = triMesh.getSdfDataFast();
|
||
|
|
const PxU32 numSdfs = sdfData.mNumSdfs;
|
||
|
|
if (numSdfs > 0)
|
||
|
|
{
|
||
|
|
meshDataSize +=
|
||
|
|
+ sizeof(float4) // (meshLower.x, meshLower.y, meshLower.z, spacing)
|
||
|
|
+ sizeof(uint4)
|
||
|
|
+ sizeof(CUtexObject) // SDF texture object reference
|
||
|
|
+ sizeof(CUtexObject)
|
||
|
|
+ sizeof(PxU32) * sdfData.mNumStartSlots;
|
||
|
|
}
|
||
|
|
|
||
|
|
// Make sure memory is always 16-byte aligned
|
||
|
|
meshDataSize = (meshDataSize + 15) & ~15;
|
||
|
|
|
||
|
|
//ML: don't know whether we need to have local bound
|
||
|
|
const PxU64 bv32Size = triMesh.mGRB_BV32Tree->mNbPackedNodes * sizeof(BV32DataPacked);
|
||
|
|
|
||
|
|
return meshDataSize + bv32Size;
|
||
|
|
}
|
||
|
|
|
||
|
|
static PxU64 computeHeightfieldByteSize(const HeightFieldData& hf)
|
||
|
|
{
|
||
|
|
|
||
|
|
/* Height field height data is 16 bit signed integers, followed by triangle materials.
|
||
|
|
|
||
|
|
Each sample is 32 bits wide arranged as follows:
|
||
|
|
|
||
|
|
1) First there is a 16 bit height value.
|
||
|
|
2) Next, two one byte material indices, with the high bit of each byte reserved for special use.
|
||
|
|
(so the material index is only 7 bits).
|
||
|
|
The high bit of material0 is the tess-flag.
|
||
|
|
The high bit of material1 is reserved for future use.
|
||
|
|
*/
|
||
|
|
|
||
|
|
return sizeof(PxU32) //rows
|
||
|
|
+ sizeof(PxU32) //columns
|
||
|
|
+ sizeof(PxU32) * hf.columns * hf.rows
|
||
|
|
+ sizeof(PxU16); //PxHeightFieldFlags
|
||
|
|
}
|
||
|
|
|
||
|
|
static void layOutBoxHull(void* mem)
|
||
|
|
{
|
||
|
|
const float4 vertices[8] = { make_float4(-1.f, 1.f, -1.f, 0.f),
|
||
|
|
make_float4(1.f, 1.f, -1.f, 0.f),
|
||
|
|
make_float4(1.f, -1.f, -1.f, 0.f),
|
||
|
|
make_float4(-1.f, -1.f, -1.f, 0.f),
|
||
|
|
make_float4(-1.f, -1.f, 1.f, 0.f),
|
||
|
|
make_float4(-1.f, 1.f, 1.f, 0.f),
|
||
|
|
make_float4(1.f, -1.f, 1.f, 0.f),
|
||
|
|
make_float4(1.f, 1.f, 1.f, 0.f)
|
||
|
|
};
|
||
|
|
|
||
|
|
const float4 planes[6] = {make_float4(0.f, 0.0f, -1.0f, -1.0f),
|
||
|
|
make_float4(-1.0f, 0.0f, 0.0f, -1.0f),
|
||
|
|
make_float4(0.0f, -1.0f, 0.0f, -1.0f),
|
||
|
|
make_float4(0.0f, 1.0f, 0.0f, -1.0f),
|
||
|
|
make_float4(0.0f, 0.0f, 1.0f, -1.0f),
|
||
|
|
make_float4(1.0f, 0.0f, 0.0f, -1.0f)};
|
||
|
|
|
||
|
|
const PxU32 polyData[6] = {merge(0, 4, 4),
|
||
|
|
merge(4, 4, 1),
|
||
|
|
merge(8, 4, 0),
|
||
|
|
merge(12, 4, 2),
|
||
|
|
merge(16, 4, 0),
|
||
|
|
merge(20, 4, 0)};
|
||
|
|
|
||
|
|
const PxU16 vertsByEdges[24] = { 0, 1, 3, 0, 5, 0, 1, 2, 7, 1, 2, 3, 2, 6, 3, 4, 4, 5, 6, 4, 5, 7, 6, 7 };
|
||
|
|
|
||
|
|
const PxU8 facesByEdges[24] = {
|
||
|
|
0, 3,
|
||
|
|
0, 1,
|
||
|
|
1, 3,
|
||
|
|
0, 5,
|
||
|
|
|
||
|
|
3, 5,
|
||
|
|
0, 2,
|
||
|
|
2, 5,
|
||
|
|
1, 2,
|
||
|
|
|
||
|
|
1, 4,
|
||
|
|
2, 4,
|
||
|
|
3, 4,
|
||
|
|
4, 5
|
||
|
|
};
|
||
|
|
|
||
|
|
const PxU8 facesByVerts[24] = {
|
||
|
|
0, 1, 3,
|
||
|
|
0, 3, 5,
|
||
|
|
0, 2, 5,
|
||
|
|
0, 1, 2,
|
||
|
|
|
||
|
|
1, 2, 4,
|
||
|
|
1, 3, 4,
|
||
|
|
2, 4, 5,
|
||
|
|
3, 4, 5
|
||
|
|
};
|
||
|
|
|
||
|
|
const PxU8 polyIndices[24] = {0, 1, 2, 3,
|
||
|
|
4, 5, 0, 3,
|
||
|
|
4, 3, 2, 6,
|
||
|
|
7, 1, 0, 5,
|
||
|
|
7, 5, 4, 6,
|
||
|
|
7, 6, 2, 1
|
||
|
|
};
|
||
|
|
|
||
|
|
PxU8* m = reinterpret_cast<PxU8*>(mem);
|
||
|
|
*((float4*)m) = make_float4(0.0f, 0.0f, 0.0f, 0.0f);//center of mass
|
||
|
|
m += sizeof(float4);
|
||
|
|
*((PxU32*)m) = merge(12, merge((PxU8) 8, (PxU8) 6));
|
||
|
|
m += 4 * sizeof(PxU32);
|
||
|
|
*((float4*)m) = make_float4(1.0f, //half-extents and the insphere radius
|
||
|
|
1.0f,
|
||
|
|
1.0f,
|
||
|
|
1.0f);
|
||
|
|
|
||
|
|
m += sizeof(float4);
|
||
|
|
PxMemCopy(m, vertices, sizeof(float4) * 8);
|
||
|
|
m += sizeof(float4) * 8;
|
||
|
|
PxMemCopy(m, planes, sizeof(float4) * 6);
|
||
|
|
m += sizeof(float4) * 6;
|
||
|
|
PxMemCopy(m, polyData, sizeof(PxU32) * 6);
|
||
|
|
m += sizeof(PxU32) * 6;
|
||
|
|
PxMemCopy(m, vertsByEdges, sizeof(PxU16) * 2 * 12);
|
||
|
|
m += sizeof(PxU16) * 2 * 12;
|
||
|
|
PxMemCopy(m, facesByEdges, sizeof(PxU8) * 2 * 12);
|
||
|
|
m += sizeof(PxU8) * 2 * 12;
|
||
|
|
PxMemCopy(m, facesByVerts, sizeof(PxU8) * 3 * 8);
|
||
|
|
m += sizeof(PxU8) * 3 * 8;
|
||
|
|
PxMemCopy(m, polyIndices, sizeof(PxU8) * 24);
|
||
|
|
}
|
||
|
|
|
||
|
|
static void layOutHull(void* mem, const ConvexHullData& hull, PxU32 numPolyVertices)
|
||
|
|
{
|
||
|
|
PxU8* m = (PxU8*) mem;
|
||
|
|
*((float4*)m) = make_float4(hull.mCenterOfMass.x, hull.mCenterOfMass.y, hull.mCenterOfMass.z, 0.f);
|
||
|
|
m +=sizeof(float4);
|
||
|
|
*((PxU32*)m) = merge(hull.mNbEdges, merge(hull.mNbHullVertices, hull.mNbPolygons));
|
||
|
|
m += 4 * sizeof(PxU32);
|
||
|
|
*((float4*)m) = make_float4(hull.mInternal.mInternalExtents.x,
|
||
|
|
hull.mInternal.mInternalExtents.y,
|
||
|
|
hull.mInternal.mInternalExtents.z,
|
||
|
|
hull.mInternal.mInternalRadius);
|
||
|
|
|
||
|
|
m += sizeof(float4);
|
||
|
|
|
||
|
|
const PxVec3* verts = hull.getHullVertices();
|
||
|
|
|
||
|
|
for (const PxVec3* end = verts + hull.mNbHullVertices; verts < end; ++verts)
|
||
|
|
{
|
||
|
|
*((float4*)m) = make_float4(verts->x, verts->y, verts->z, 0);
|
||
|
|
m += sizeof(float4);
|
||
|
|
}
|
||
|
|
|
||
|
|
const HullPolygonData* polys = hull.mPolygons;
|
||
|
|
float4* planes = (float4*)m;
|
||
|
|
PxU32* vRef8NbVertsMinIndex = (PxU32*) (m + sizeof(float4) * hull.mNbPolygons);
|
||
|
|
|
||
|
|
for (const HullPolygonData* end = polys + hull.mNbPolygons; polys < end; ++polys)
|
||
|
|
{
|
||
|
|
*(planes++) = make_float4(polys->mPlane.n.x, polys->mPlane.n.y, polys->mPlane.n.z, polys->mPlane.d);
|
||
|
|
*(vRef8NbVertsMinIndex++) = merge(polys->mVRef8, polys->mNbVerts, polys->mMinIndex);
|
||
|
|
}
|
||
|
|
|
||
|
|
m += (sizeof(float4) + sizeof(PxU32)) * hull.mNbPolygons;
|
||
|
|
|
||
|
|
PxMemCopy(m, hull.getVerticesByEdges16(), sizeof(PxU16) * 2*hull.mNbEdges);
|
||
|
|
m += sizeof(PxU16) * 2*hull.mNbEdges;
|
||
|
|
|
||
|
|
PxMemCopy(m, hull.getFacesByEdges8(), sizeof(PxU8) * 2*hull.mNbEdges);
|
||
|
|
m += sizeof(PxU8) * 2*hull.mNbEdges;
|
||
|
|
|
||
|
|
PxMemCopy(m, hull.getFacesByVertices8(), sizeof(PxU8) * 3*hull.mNbHullVertices);
|
||
|
|
m += sizeof(PxU8) * 3*hull.mNbHullVertices;
|
||
|
|
|
||
|
|
PxMemCopy(m, hull.getVertexData8(), sizeof(PxU8) * numPolyVertices);
|
||
|
|
//m += sizeof(PxU8) * numPolyVertices;
|
||
|
|
}
|
||
|
|
|
||
|
|
static PxgMeshTextureData layOutTriMesh(void* mem, const TriangleMesh& triMesh, CUstream stream)
|
||
|
|
{
|
||
|
|
const PxU32 numTris = triMesh.getNbTrianglesFast();
|
||
|
|
const PxU32 numVerts = triMesh.getNbVerticesFast();
|
||
|
|
|
||
|
|
BV32Tree* bv32Tree = triMesh.mGRB_BV32Tree;
|
||
|
|
|
||
|
|
PxU8* m = (PxU8*) mem;
|
||
|
|
*((uint4*)m) = make_uint4(triMesh.getNbVerticesFast(), triMesh.getNbTrianglesFast(), 0, bv32Tree->mNbPackedNodes);
|
||
|
|
m += sizeof(uint4);
|
||
|
|
|
||
|
|
//Midphase
|
||
|
|
PxMemCopy(m, bv32Tree->mPackedNodes, sizeof(BV32DataPacked) * bv32Tree->mNbPackedNodes);
|
||
|
|
m += sizeof(BV32DataPacked) * bv32Tree->mNbPackedNodes;
|
||
|
|
|
||
|
|
PX_ASSERT(bv32Tree->mNbPackedNodes > 0);
|
||
|
|
|
||
|
|
// Core: Mesh data
|
||
|
|
//construct gpu friendly verts
|
||
|
|
float4* grbVerts = reinterpret_cast<float4*>(m);
|
||
|
|
const PxVec3* verts = triMesh.getVerticesFast();
|
||
|
|
|
||
|
|
for (PxU32 i = 0; i < numVerts; ++i)
|
||
|
|
{
|
||
|
|
grbVerts[i].x = verts[i].x;
|
||
|
|
grbVerts[i].y = verts[i].y;
|
||
|
|
grbVerts[i].z = verts[i].z;
|
||
|
|
grbVerts[i].w = 0.f;
|
||
|
|
}
|
||
|
|
|
||
|
|
m += numVerts * sizeof(float4);
|
||
|
|
|
||
|
|
uint4* grbTriInd = reinterpret_cast<uint4*>(m);
|
||
|
|
//copy triangle indices
|
||
|
|
if (triMesh.has16BitIndices())
|
||
|
|
{
|
||
|
|
const PxU16* triInds = reinterpret_cast<PxU16*>(triMesh.mGRB_triIndices);
|
||
|
|
for (PxU32 i = 0; i < numTris; ++i)
|
||
|
|
{
|
||
|
|
grbTriInd[i].x = triInds[3 * i + 0];
|
||
|
|
grbTriInd[i].y = triInds[3 * i + 1];
|
||
|
|
grbTriInd[i].z = triInds[3 * i + 2];
|
||
|
|
grbTriInd[i].w = 0;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
else
|
||
|
|
{
|
||
|
|
const PxU32* triInds = reinterpret_cast<PxU32*>(triMesh.mGRB_triIndices);
|
||
|
|
for (PxU32 i = 0; i < numTris; ++i)
|
||
|
|
{
|
||
|
|
grbTriInd[i].x = triInds[3 * i + 0];
|
||
|
|
grbTriInd[i].y = triInds[3 * i + 1];
|
||
|
|
grbTriInd[i].z = triInds[3 * i + 2];
|
||
|
|
grbTriInd[i].w = 0;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
//PxMemCopy(m, triMesh->mGRB_triIndices, numTris * sizeof(uint4));
|
||
|
|
m += numTris * sizeof(uint4);
|
||
|
|
|
||
|
|
//Adjacencies data for contact gen
|
||
|
|
PxMemCopy(m, triMesh.mGRB_triAdjacencies, numTris * sizeof(uint4));
|
||
|
|
m += numTris * sizeof(uint4);
|
||
|
|
|
||
|
|
//GPU to CPU remap table
|
||
|
|
PxMemCopy(m, triMesh.mGRB_faceRemap, numTris * sizeof(PxU32));
|
||
|
|
m += numTris * sizeof(PxU32);
|
||
|
|
|
||
|
|
//GPU to CPU remap table
|
||
|
|
PxMemCopy(m, triMesh.mAccumulatedTrianglesRef, numVerts * sizeof(PxU32));
|
||
|
|
reinterpret_cast<PxU32*>(m)[numVerts] = triMesh.mNbTrianglesReferences;
|
||
|
|
m += (numVerts + 1) * sizeof(PxU32);
|
||
|
|
|
||
|
|
//GPU to CPU remap table
|
||
|
|
PxMemCopy(m, triMesh.mTrianglesReferences, (numTris*3) * sizeof(PxU32));
|
||
|
|
m += (numTris * 3) * sizeof(PxU32);
|
||
|
|
|
||
|
|
// Make sure m is 16-byte aligned
|
||
|
|
m = (PxU8*)(((size_t)m + 15) & ~15);
|
||
|
|
|
||
|
|
// Copy sdf values
|
||
|
|
const SDF& sdfData = triMesh.getSdfDataFast();
|
||
|
|
|
||
|
|
*((uint4*)m) = make_uint4(sdfData.mDims.x, sdfData.mDims.y, sdfData.mDims.z, triMesh.getPreferSDFProjection() ? 1 : 0);
|
||
|
|
m += sizeof(uint4);
|
||
|
|
|
||
|
|
const PxU32 numSdfs = sdfData.mNumSdfs;
|
||
|
|
|
||
|
|
PxgMeshTextureData result;
|
||
|
|
PxMemZero(&result, sizeof(result));
|
||
|
|
if (numSdfs)
|
||
|
|
{
|
||
|
|
const PxVec3 meshLower = sdfData.mMeshLower;
|
||
|
|
const PxReal spacing = sdfData.mSpacing;
|
||
|
|
*((float4*)m) = make_float4(meshLower.x, meshLower.y, meshLower.z, spacing);
|
||
|
|
m += sizeof(float4);
|
||
|
|
|
||
|
|
*((uint4*)m) = make_uint4(sdfData.mSubgridSize, reinterpret_cast<const PxU32&>(sdfData.mSubgridsMinSdfValue), reinterpret_cast<const PxU32&>(sdfData.mSubgridsMaxSdfValue), sdfData.mNumStartSlots);
|
||
|
|
m += sizeof(uint4);
|
||
|
|
|
||
|
|
CUtexObject* pTexture = (CUtexObject*)m;
|
||
|
|
m += sizeof(CUtexObject);
|
||
|
|
|
||
|
|
CUtexObject* pTextureSubgrids = (CUtexObject*)m;
|
||
|
|
m += sizeof(CUtexObject);
|
||
|
|
|
||
|
|
if (sdfData.mSubgridSize > 0)
|
||
|
|
PxMemCopy(m, sdfData.mSubgridStartSlots, sdfData.mNumStartSlots * sizeof(PxU32));
|
||
|
|
m += sizeof(PxU32) * sdfData.mNumStartSlots;
|
||
|
|
|
||
|
|
if (sdfData.mSubgridSize == 0)
|
||
|
|
{
|
||
|
|
CUarray cuArray;
|
||
|
|
createTextureObject<PxReal>(CU_AD_FORMAT_FLOAT, pTexture, cuArray, sdfData.mDims.x, sdfData.mDims.y, sdfData.mDims.z, sdfData.mSdf, stream);
|
||
|
|
|
||
|
|
result.cuArray = cuArray;
|
||
|
|
result.cuTexRef = *pTexture;
|
||
|
|
|
||
|
|
result.cuArraySubgrids = 0;
|
||
|
|
result.cuTexRefSubgrids = 0;
|
||
|
|
}
|
||
|
|
else
|
||
|
|
{
|
||
|
|
PxU32 x = sdfData.mDims.x / sdfData.mSubgridSize;
|
||
|
|
PxU32 y = sdfData.mDims.y / sdfData.mSubgridSize;
|
||
|
|
PxU32 z = sdfData.mDims.z / sdfData.mSubgridSize;
|
||
|
|
|
||
|
|
CUarray cuArray;
|
||
|
|
createTextureObject<PxReal>(CU_AD_FORMAT_FLOAT, pTexture, cuArray, x + 1, y + 1, z + 1, sdfData.mSdf, stream);
|
||
|
|
|
||
|
|
CUarray cuArraySubgrids = 0;
|
||
|
|
switch(sdfData.mBytesPerSparsePixel)
|
||
|
|
{
|
||
|
|
case 1:
|
||
|
|
createTextureObject<PxU8>(CU_AD_FORMAT_UNSIGNED_INT8, pTextureSubgrids, cuArraySubgrids,
|
||
|
|
sdfData.mSdfSubgrids3DTexBlockDim.x * (sdfData.mSubgridSize + 1),
|
||
|
|
sdfData.mSdfSubgrids3DTexBlockDim.y * (sdfData.mSubgridSize + 1),
|
||
|
|
sdfData.mSdfSubgrids3DTexBlockDim.z * (sdfData.mSubgridSize + 1), sdfData.mSubgridSdf, stream);
|
||
|
|
break;
|
||
|
|
case 2:
|
||
|
|
createTextureObject<PxU16>(CU_AD_FORMAT_UNSIGNED_INT16, pTextureSubgrids, cuArraySubgrids,
|
||
|
|
sdfData.mSdfSubgrids3DTexBlockDim.x * (sdfData.mSubgridSize + 1),
|
||
|
|
sdfData.mSdfSubgrids3DTexBlockDim.y * (sdfData.mSubgridSize + 1),
|
||
|
|
sdfData.mSdfSubgrids3DTexBlockDim.z * (sdfData.mSubgridSize + 1), reinterpret_cast<PxU16*>(sdfData.mSubgridSdf), stream);
|
||
|
|
break;
|
||
|
|
case 4:
|
||
|
|
createTextureObject<PxReal>(CU_AD_FORMAT_FLOAT, pTextureSubgrids, cuArraySubgrids,
|
||
|
|
sdfData.mSdfSubgrids3DTexBlockDim.x * (sdfData.mSubgridSize + 1),
|
||
|
|
sdfData.mSdfSubgrids3DTexBlockDim.y * (sdfData.mSubgridSize + 1),
|
||
|
|
sdfData.mSdfSubgrids3DTexBlockDim.z * (sdfData.mSubgridSize + 1), reinterpret_cast<PxReal*>(sdfData.mSubgridSdf), stream);
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
|
||
|
|
result.cuArray = cuArray;
|
||
|
|
result.cuTexRef = *pTexture;
|
||
|
|
|
||
|
|
if (cuArraySubgrids)
|
||
|
|
{
|
||
|
|
result.cuArraySubgrids = cuArraySubgrids;
|
||
|
|
result.cuTexRefSubgrids = *pTextureSubgrids;
|
||
|
|
}
|
||
|
|
else
|
||
|
|
{
|
||
|
|
result.cuArraySubgrids = 0;
|
||
|
|
result.cuTexRefSubgrids = 0;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
return result;
|
||
|
|
}
|
||
|
|
|
||
|
|
static void layOutHeightfield(void* mem, const HeightFieldData& hf)
|
||
|
|
{
|
||
|
|
PxU8* m = (PxU8*) mem;
|
||
|
|
|
||
|
|
PxU32 nbRows = hf.rows;
|
||
|
|
PxU32 nbCols = hf.columns;
|
||
|
|
|
||
|
|
*((PxU32* ) m) = nbRows;
|
||
|
|
m += sizeof(PxU32);
|
||
|
|
|
||
|
|
*((PxU32* ) m) = nbCols;
|
||
|
|
m += sizeof(PxU32);
|
||
|
|
|
||
|
|
PX_ASSERT(sizeof(PxU32) == sizeof(PxHeightFieldSample));
|
||
|
|
PxMemCopy(m, hf.samples, sizeof(PxU32) * nbRows * nbCols);
|
||
|
|
|
||
|
|
m += sizeof(PxU32) * nbRows * nbCols;
|
||
|
|
|
||
|
|
*((PxU16*)m) = hf.flags;
|
||
|
|
}
|
||
|
|
|
||
|
|
template<typename T>
|
||
|
|
static void createTextureObject(CUarray_format format, CUtexObject*& texture, CUarray& cuArray, PxU32 width, PxU32 height, PxU32 depth, const T* data, CUstream stream)
|
||
|
|
{
|
||
|
|
if (width == 0 || height == 0 || depth == 0)
|
||
|
|
{
|
||
|
|
texture = 0;
|
||
|
|
cuArray = 0;
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
|
||
|
|
CUDA_ARRAY3D_DESCRIPTOR_st arrDesc;
|
||
|
|
arrDesc.Format = format;
|
||
|
|
arrDesc.NumChannels = 1;
|
||
|
|
arrDesc.Width = width;
|
||
|
|
arrDesc.Height = height;
|
||
|
|
arrDesc.Depth = depth;
|
||
|
|
arrDesc.Flags = 0;
|
||
|
|
CUresult r = cuArray3DCreate(&cuArray, &arrDesc);
|
||
|
|
PX_UNUSED(r);
|
||
|
|
PX_ASSERT(r == CUDA_SUCCESS);
|
||
|
|
|
||
|
|
CUDA_MEMCPY3D copyParam;
|
||
|
|
PxMemZero(©Param, sizeof(copyParam));
|
||
|
|
copyParam.dstMemoryType = CU_MEMORYTYPE_ARRAY;
|
||
|
|
copyParam.dstArray = cuArray;
|
||
|
|
copyParam.srcMemoryType = CU_MEMORYTYPE_HOST;
|
||
|
|
copyParam.srcHost = data;
|
||
|
|
copyParam.srcPitch = width * sizeof(T);
|
||
|
|
copyParam.srcHeight = height;
|
||
|
|
copyParam.WidthInBytes = copyParam.srcPitch;
|
||
|
|
copyParam.Height = height;
|
||
|
|
copyParam.Depth = depth;
|
||
|
|
|
||
|
|
cuMemcpy3DAsync(©Param, stream);
|
||
|
|
|
||
|
|
CUDA_RESOURCE_DESC resDesc;
|
||
|
|
PxMemZero(&resDesc, sizeof(resDesc));
|
||
|
|
resDesc.resType = CU_RESOURCE_TYPE_ARRAY;
|
||
|
|
resDesc.res.array.hArray = cuArray;
|
||
|
|
|
||
|
|
CUDA_TEXTURE_DESC texDesc;
|
||
|
|
PxMemZero(&texDesc, sizeof(texDesc));
|
||
|
|
texDesc.addressMode[0] = CU_TR_ADDRESS_MODE_CLAMP;
|
||
|
|
texDesc.addressMode[1] = CU_TR_ADDRESS_MODE_CLAMP;
|
||
|
|
texDesc.addressMode[2] = CU_TR_ADDRESS_MODE_CLAMP;
|
||
|
|
|
||
|
|
texDesc.filterMode = CU_TR_FILTER_MODE_LINEAR;
|
||
|
|
|
||
|
|
r = cuTexObjectCreate(texture, &resDesc, &texDesc, NULL);
|
||
|
|
PX_ASSERT(r == CUDA_SUCCESS);
|
||
|
|
}
|