// Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // * Neither the name of NVIDIA CORPORATION nor the names of its // contributors may be used to endorse or promote products derived // from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved. // Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. // Copyright (c) 2001-2004 NovodeX AG. All rights reserved. #include "PxgGeometryManager.h" #include "PxsHeapMemoryAllocator.h" #include "foundation/PxAssert.h" #include "foundation/PxBasicTemplates.h" #include "foundation/PxMemory.h" #include "foundation/PxSimpleTypes.h" #include "foundation/PxVec3.h" #include "PxgCopyManager.h" #include "PxgCudaUtils.h" #include "PxgHeapMemAllocator.h" #include "GuBV32.h" #include "convex/GuConvexMesh.h" #include "mesh/GuTriangleMesh.h" #include "hf/GuHeightField.h" #include "GuSDF.h" #include "cudaNpCommon.h" #include "cudamanager/PxCudaContextManager.h" #include "cudamanager/PxCudaContext.h" #include #include // AD: PxgGeometryManager manages the CPU/GPU data transfers and the lifetime of collision geometries: Convex Hulls, Trimeshes, SDFs, Heightfields. using namespace physx; using namespace Gu; // forward declarations of static functions static PxU64 computeBoxHullByteSize(); static PxU64 computeHullByteSize(const ConvexHullData& hull, PxU32& numPolyVertices); static PxU64 computeTriMeshByteSize(const TriangleMesh& triMesh); static PxU64 computeHeightfieldByteSize(const HeightFieldData& hf); static void layOutBoxHull(void* mem); static void layOutHull(void* mem, const ConvexHullData& hull, PxU32 numPolyVertices); static PxgMeshTextureData layOutTriMesh(void* mem, const TriangleMesh& triMesh, CUstream stream); static void layOutHeightfield(void* mem, const HeightFieldData& hf); template static void createTextureObject(CUarray_format format, CUtexObject*& texture, CUarray& cuArray, PxU32 width, PxU32 height, PxU32 depth, const T* data, CUstream stream); // PxgGeometryManager definitions PxgGeometryManager::PxgGeometryManager(PxgHeapMemoryAllocatorManager* heapMemoryManager): mDeviceMemoryAllocator(heapMemoryManager->mDeviceMemoryAllocators), mPinnedMemoryAllocator(static_cast(heapMemoryManager->mMappedMemoryAllocators)), mPinnedMemoryBasePtr(NULL), mPinnedHostMemoryRequirements(0), mFreeGeometryIndices(mGeometryData), mBoxHullIdx(0xFFffFFff) { } PxgGeometryManager::~PxgGeometryManager() { if (mBoxHullIdx != 0xFFFFFFFF) mDeviceMemoryAllocator->deallocate(reinterpret_cast(getGeometryDevPtrByIndex(mBoxHullIdx))); if (mPinnedMemoryBasePtr) { mPinnedMemoryAllocator->deallocate(mPinnedMemoryBasePtr); mPinnedMemoryBasePtr = NULL; } } PxU32 PxgGeometryManager::addGeometryInternal(PxU64 byteSize, const void* geomPtr, UploadGeometryType::Enum type, PxU32 numPolyVertices /*= 0*/) { byteSize = (byteSize + 255) & ~255; mPinnedHostMemoryRequirements += byteSize; PxU32 idx = mFreeGeometryIndices.getFreeIndex(); void* devicePtr = mDeviceMemoryAllocator->allocate(byteSize, PxsHeapStats::eNARROWPHASE, PX_FL); PxU32 copyIndex = mScheduledCopies.size(); ScheduledCopyData scheduledCopy; scheduledCopy.mHullOrTrimeshIdx = idx; scheduledCopy.mGeometryPtr = geomPtr; scheduledCopy.mType = type; scheduledCopy.mNumPolyVertices = numPolyVertices; PxgCopyManager::CopyDesc desc; desc.dest = (size_t)devicePtr; desc.bytes = (size_t)byteSize; scheduledCopy.mCopyDesc = desc; mScheduledCopies.pushBack(scheduledCopy); HullOrMeshData newHullOrMesh; newHullOrMesh.mDeviceMemPointer = devicePtr; newHullOrMesh.mCopyDescIndex = copyIndex; PX_ASSERT(idx < mGeometryData.size()); mGeometryData[idx] = newHullOrMesh; return idx; } void PxgGeometryManager::addBoxHull() { if (mBoxHullIdx != 0xFFffFFff) return; PxU64 byteSize = computeBoxHullByteSize(); mBoxHullIdx = addGeometryInternal(byteSize, NULL, UploadGeometryType::eBOXHULL); } PxU32 PxgGeometryManager::addHull(const ConvexHullData& hull) { PxU32 numPolyVertices; PxU64 byteSize = computeHullByteSize(hull, numPolyVertices); return addGeometryInternal(byteSize, &hull, UploadGeometryType::eCONVEXHULL, numPolyVertices); } void PxgGeometryManager::removeGeometry(PxU32 idx) { PX_ASSERT(idx < mGeometryData.size()); HullOrMeshData geometryToRemove = mGeometryData[idx]; PxU32 scheduledCopyIndex = geometryToRemove.mCopyDescIndex; if (scheduledCopyIndex != HullOrMeshData::INVALID_COPY_DESC_INDEX) { PxU32 lastScheduledCopyHullOrTrimeshIndex = mScheduledCopies.back().mHullOrTrimeshIdx; if (lastScheduledCopyHullOrTrimeshIndex != idx) { mScheduledCopies.replaceWithLast(scheduledCopyIndex); PX_ASSERT(lastScheduledCopyHullOrTrimeshIndex < mGeometryData.size()); mGeometryData[lastScheduledCopyHullOrTrimeshIndex].mCopyDescIndex = scheduledCopyIndex; } else { mScheduledCopies.popBack(); } } const PxPair* pair = mMeshToTextureMap.find(geometryToRemove.mDeviceMemPointer); if (pair) { cuArrayDestroy(pair->second.cuArray); cuTexObjectDestroy(pair->second.cuTexRef); if (pair->second.cuArraySubgrids) { cuArrayDestroy(pair->second.cuArraySubgrids); cuTexObjectDestroy(pair->second.cuTexRefSubgrids); } mMeshToTextureMap.erase(geometryToRemove.mDeviceMemPointer); } mDeviceMemoryAllocator->deallocate(geometryToRemove.mDeviceMemPointer); mFreeGeometryIndices.setFreeIndex(idx); } void PxgGeometryManager::scheduleCopyHtoD(PxgCopyManager& copyMan, PxCudaContext& cudaContext, CUstream stream) { // allocate the proper amount of pinned memory mPinnedMemoryBasePtr = mPinnedMemoryAllocator->allocate(mPinnedHostMemoryRequirements, PxsHeapStats::eNARROWPHASE, PX_FL); PxU8* basePtr = reinterpret_cast(mPinnedMemoryBasePtr); for (PxArray::Iterator it = mScheduledCopies.begin(), end = mScheduledCopies.end(); it != end; ++it) { PxU64 byteSize = it->mCopyDesc.bytes; PxU8* mem = basePtr; basePtr += byteSize; PX_ASSERT((byteSize & 255) == 0); PX_ASSERT(((size_t)mem & 255) == 0); // layout the geometry in pinned memory depending on type switch (it->mType) { case UploadGeometryType::eTRIANGLEMESH: { PxgMeshTextureData texData = layOutTriMesh(mem, *reinterpret_cast(it->mGeometryPtr), stream); if (texData.cuArray) { PX_ASSERT(it->mHullOrTrimeshIdx < mGeometryData.size()); mMeshToTextureMap.insert(mGeometryData[it->mHullOrTrimeshIdx].mDeviceMemPointer, texData); } } break; case UploadGeometryType::eCONVEXHULL: layOutHull(mem, *reinterpret_cast(it->mGeometryPtr), it->mNumPolyVertices); break; case UploadGeometryType::eHEIGHTFIELD: layOutHeightfield(mem, *reinterpret_cast(it->mGeometryPtr)); break; case UploadGeometryType::eBOXHULL: layOutBoxHull(mem); break; default: PX_ALWAYS_ASSERT(); } // set the source pointer in the copy descriptor. it->mCopyDesc.source = reinterpret_cast(getMappedDevicePtr(&cudaContext, mem)); // schedule the copies. PX_ASSERT(it->mHullOrTrimeshIdx < mGeometryData.size()); mGeometryData[it->mHullOrTrimeshIdx].mCopyDescIndex = HullOrMeshData::INVALID_COPY_DESC_INDEX; copyMan.pushDeferredHtoD(it->mCopyDesc); } mScheduledCopies.forceSize_Unsafe(0); mPinnedHostMemoryRequirements = 0; } void PxgGeometryManager::resetAfterMemcpyCompleted() { // AD: we basically never send geometry each frame, and they're usually sized differently anyway, so let's give this memory back // to keep pinned memory usage under control. if (mPinnedMemoryBasePtr) { mPinnedMemoryAllocator->deallocate(mPinnedMemoryBasePtr); mPinnedMemoryBasePtr = NULL; } mFreeGeometryIndices.releaseFreeIndices(); } CUdeviceptr PxgGeometryManager::getGeometryDevPtrByIndex(PxU32 idx) const { PX_ASSERT(idx < mGeometryData.size()); return reinterpret_cast(mGeometryData[idx].mDeviceMemPointer); } CUdeviceptr PxgGeometryManager::getBoxHullDevPtr() const { PX_ASSERT(mBoxHullIdx != 0xFFffFFff); PX_ASSERT(mBoxHullIdx < mGeometryData.size()); return reinterpret_cast(mGeometryData[mBoxHullIdx].mDeviceMemPointer); } PxU32 PxgGeometryManager::addTriMesh(const TriangleMesh& triMesh) { PxU64 byteSize = computeTriMeshByteSize(triMesh); return addGeometryInternal(byteSize, &triMesh, UploadGeometryType::eTRIANGLEMESH); } PxU32 PxgGeometryManager::addHeightfield(const HeightFieldData& hf) { PxU64 byteSize = computeHeightfieldByteSize(hf); return addGeometryInternal(byteSize, &hf, UploadGeometryType::eHEIGHTFIELD); } // static functions static PxU64 computeBoxHullByteSize() { PxU64 byteSize = sizeof(float4) + //center of mass sizeof(PxU32) + // NbEdgesNbHullVerticesNbPolygons 3 * sizeof(PxU32) + //pad sizeof(float4) + //extents sizeof(float4) * 8 + //vertices sizeof(float4) * 6 + //planes sizeof(PxU32) * 6 + //vRef8NbVertsMinIndex0 sizeof(PxU16) * 2 * 12 + //mVerticesByEdges16 sizeof(PxU8) * 2 * 12 + //mFacesByEdges8 sizeof(PxU8) * 3 * 8 + //mFacesByVertices8 sizeof(PxU8) * 24; //vertexData8 return byteSize; } static PxU64 computeHullByteSize(const ConvexHullData& hull, PxU32& numPolyVertices) { numPolyVertices = 0; for (PxU32 i = 0; i < hull.mNbPolygons; ++i) { numPolyVertices += hull.mPolygons[i].mNbVerts; } return sizeof(float4) + //center of mass sizeof(PxU32) + // NbEdgesNbHullVerticesNbPolygons 3 * sizeof(PxU32) + //pad sizeof(float4) + //extents sizeof(float4) * hull.mNbHullVertices +//vertices sizeof(float4) * hull.mNbPolygons +//planes sizeof(PxU32) * hull.mNbPolygons +//vRef8NbVertsMinIndex0 sizeof(PxU16) * 2 * hull.mNbEdges +//mVerticesByEdges16 sizeof(PxU8) * 2 * hull.mNbEdges +//mFacesByEdges8 sizeof(PxU8) * 3 * hull.mNbHullVertices +//mFacesByVertices8 sizeof(PxU8) * numPolyVertices;//vertexData8 } static PxU64 computeTriMeshByteSize(const TriangleMesh& triMesh) { const PxU32 numTris = triMesh.getNbTrianglesFast(); const PxU32 numVerts = triMesh.getNbVerticesFast(); PxU64 meshDataSize = sizeof(uint4) // (nbVerts, nbTris, meshAdjVerticiesTotal, nbBv32TreeNodes) + sizeof(float4) * numVerts // meshVerts + sizeof(uint4) * numTris // meshTriIndices + sizeof(uint4) * numTris // meshTriAdjacencies + sizeof(PxU32) * numTris // gpu to cpu remap table + sizeof(PxU32) * numTris * 3 // vertex to triangle remap table + sizeof(PxU32) * (numVerts + 1) // vertex to triangle offset table ; // Make sure memory is always 16-byte aligned meshDataSize = (meshDataSize + 15) & ~15; meshDataSize += sizeof(uint4); // (sdfDimX, sdfDimY, sdfDimZ, 0) const SDF& sdfData = triMesh.getSdfDataFast(); const PxU32 numSdfs = sdfData.mNumSdfs; if (numSdfs > 0) { meshDataSize += + sizeof(float4) // (meshLower.x, meshLower.y, meshLower.z, spacing) + sizeof(uint4) + sizeof(CUtexObject) // SDF texture object reference + sizeof(CUtexObject) + sizeof(PxU32) * sdfData.mNumStartSlots; } // Make sure memory is always 16-byte aligned meshDataSize = (meshDataSize + 15) & ~15; //ML: don't know whether we need to have local bound const PxU64 bv32Size = triMesh.mGRB_BV32Tree->mNbPackedNodes * sizeof(BV32DataPacked); return meshDataSize + bv32Size; } static PxU64 computeHeightfieldByteSize(const HeightFieldData& hf) { /* Height field height data is 16 bit signed integers, followed by triangle materials. Each sample is 32 bits wide arranged as follows: 1) First there is a 16 bit height value. 2) Next, two one byte material indices, with the high bit of each byte reserved for special use. (so the material index is only 7 bits). The high bit of material0 is the tess-flag. The high bit of material1 is reserved for future use. */ return sizeof(PxU32) //rows + sizeof(PxU32) //columns + sizeof(PxU32) * hf.columns * hf.rows + sizeof(PxU16); //PxHeightFieldFlags } static void layOutBoxHull(void* mem) { const float4 vertices[8] = { make_float4(-1.f, 1.f, -1.f, 0.f), make_float4(1.f, 1.f, -1.f, 0.f), make_float4(1.f, -1.f, -1.f, 0.f), make_float4(-1.f, -1.f, -1.f, 0.f), make_float4(-1.f, -1.f, 1.f, 0.f), make_float4(-1.f, 1.f, 1.f, 0.f), make_float4(1.f, -1.f, 1.f, 0.f), make_float4(1.f, 1.f, 1.f, 0.f) }; const float4 planes[6] = {make_float4(0.f, 0.0f, -1.0f, -1.0f), make_float4(-1.0f, 0.0f, 0.0f, -1.0f), make_float4(0.0f, -1.0f, 0.0f, -1.0f), make_float4(0.0f, 1.0f, 0.0f, -1.0f), make_float4(0.0f, 0.0f, 1.0f, -1.0f), make_float4(1.0f, 0.0f, 0.0f, -1.0f)}; const PxU32 polyData[6] = {merge(0, 4, 4), merge(4, 4, 1), merge(8, 4, 0), merge(12, 4, 2), merge(16, 4, 0), merge(20, 4, 0)}; const PxU16 vertsByEdges[24] = { 0, 1, 3, 0, 5, 0, 1, 2, 7, 1, 2, 3, 2, 6, 3, 4, 4, 5, 6, 4, 5, 7, 6, 7 }; const PxU8 facesByEdges[24] = { 0, 3, 0, 1, 1, 3, 0, 5, 3, 5, 0, 2, 2, 5, 1, 2, 1, 4, 2, 4, 3, 4, 4, 5 }; const PxU8 facesByVerts[24] = { 0, 1, 3, 0, 3, 5, 0, 2, 5, 0, 1, 2, 1, 2, 4, 1, 3, 4, 2, 4, 5, 3, 4, 5 }; const PxU8 polyIndices[24] = {0, 1, 2, 3, 4, 5, 0, 3, 4, 3, 2, 6, 7, 1, 0, 5, 7, 5, 4, 6, 7, 6, 2, 1 }; PxU8* m = reinterpret_cast(mem); *((float4*)m) = make_float4(0.0f, 0.0f, 0.0f, 0.0f);//center of mass m += sizeof(float4); *((PxU32*)m) = merge(12, merge((PxU8) 8, (PxU8) 6)); m += 4 * sizeof(PxU32); *((float4*)m) = make_float4(1.0f, //half-extents and the insphere radius 1.0f, 1.0f, 1.0f); m += sizeof(float4); PxMemCopy(m, vertices, sizeof(float4) * 8); m += sizeof(float4) * 8; PxMemCopy(m, planes, sizeof(float4) * 6); m += sizeof(float4) * 6; PxMemCopy(m, polyData, sizeof(PxU32) * 6); m += sizeof(PxU32) * 6; PxMemCopy(m, vertsByEdges, sizeof(PxU16) * 2 * 12); m += sizeof(PxU16) * 2 * 12; PxMemCopy(m, facesByEdges, sizeof(PxU8) * 2 * 12); m += sizeof(PxU8) * 2 * 12; PxMemCopy(m, facesByVerts, sizeof(PxU8) * 3 * 8); m += sizeof(PxU8) * 3 * 8; PxMemCopy(m, polyIndices, sizeof(PxU8) * 24); } static void layOutHull(void* mem, const ConvexHullData& hull, PxU32 numPolyVertices) { PxU8* m = (PxU8*) mem; *((float4*)m) = make_float4(hull.mCenterOfMass.x, hull.mCenterOfMass.y, hull.mCenterOfMass.z, 0.f); m +=sizeof(float4); *((PxU32*)m) = merge(hull.mNbEdges, merge(hull.mNbHullVertices, hull.mNbPolygons)); m += 4 * sizeof(PxU32); *((float4*)m) = make_float4(hull.mInternal.mInternalExtents.x, hull.mInternal.mInternalExtents.y, hull.mInternal.mInternalExtents.z, hull.mInternal.mInternalRadius); m += sizeof(float4); const PxVec3* verts = hull.getHullVertices(); for (const PxVec3* end = verts + hull.mNbHullVertices; verts < end; ++verts) { *((float4*)m) = make_float4(verts->x, verts->y, verts->z, 0); m += sizeof(float4); } const HullPolygonData* polys = hull.mPolygons; float4* planes = (float4*)m; PxU32* vRef8NbVertsMinIndex = (PxU32*) (m + sizeof(float4) * hull.mNbPolygons); for (const HullPolygonData* end = polys + hull.mNbPolygons; polys < end; ++polys) { *(planes++) = make_float4(polys->mPlane.n.x, polys->mPlane.n.y, polys->mPlane.n.z, polys->mPlane.d); *(vRef8NbVertsMinIndex++) = merge(polys->mVRef8, polys->mNbVerts, polys->mMinIndex); } m += (sizeof(float4) + sizeof(PxU32)) * hull.mNbPolygons; PxMemCopy(m, hull.getVerticesByEdges16(), sizeof(PxU16) * 2*hull.mNbEdges); m += sizeof(PxU16) * 2*hull.mNbEdges; PxMemCopy(m, hull.getFacesByEdges8(), sizeof(PxU8) * 2*hull.mNbEdges); m += sizeof(PxU8) * 2*hull.mNbEdges; PxMemCopy(m, hull.getFacesByVertices8(), sizeof(PxU8) * 3*hull.mNbHullVertices); m += sizeof(PxU8) * 3*hull.mNbHullVertices; PxMemCopy(m, hull.getVertexData8(), sizeof(PxU8) * numPolyVertices); //m += sizeof(PxU8) * numPolyVertices; } static PxgMeshTextureData layOutTriMesh(void* mem, const TriangleMesh& triMesh, CUstream stream) { const PxU32 numTris = triMesh.getNbTrianglesFast(); const PxU32 numVerts = triMesh.getNbVerticesFast(); BV32Tree* bv32Tree = triMesh.mGRB_BV32Tree; PxU8* m = (PxU8*) mem; *((uint4*)m) = make_uint4(triMesh.getNbVerticesFast(), triMesh.getNbTrianglesFast(), 0, bv32Tree->mNbPackedNodes); m += sizeof(uint4); //Midphase PxMemCopy(m, bv32Tree->mPackedNodes, sizeof(BV32DataPacked) * bv32Tree->mNbPackedNodes); m += sizeof(BV32DataPacked) * bv32Tree->mNbPackedNodes; PX_ASSERT(bv32Tree->mNbPackedNodes > 0); // Core: Mesh data //construct gpu friendly verts float4* grbVerts = reinterpret_cast(m); const PxVec3* verts = triMesh.getVerticesFast(); for (PxU32 i = 0; i < numVerts; ++i) { grbVerts[i].x = verts[i].x; grbVerts[i].y = verts[i].y; grbVerts[i].z = verts[i].z; grbVerts[i].w = 0.f; } m += numVerts * sizeof(float4); uint4* grbTriInd = reinterpret_cast(m); //copy triangle indices if (triMesh.has16BitIndices()) { const PxU16* triInds = reinterpret_cast(triMesh.mGRB_triIndices); for (PxU32 i = 0; i < numTris; ++i) { grbTriInd[i].x = triInds[3 * i + 0]; grbTriInd[i].y = triInds[3 * i + 1]; grbTriInd[i].z = triInds[3 * i + 2]; grbTriInd[i].w = 0; } } else { const PxU32* triInds = reinterpret_cast(triMesh.mGRB_triIndices); for (PxU32 i = 0; i < numTris; ++i) { grbTriInd[i].x = triInds[3 * i + 0]; grbTriInd[i].y = triInds[3 * i + 1]; grbTriInd[i].z = triInds[3 * i + 2]; grbTriInd[i].w = 0; } } //PxMemCopy(m, triMesh->mGRB_triIndices, numTris * sizeof(uint4)); m += numTris * sizeof(uint4); //Adjacencies data for contact gen PxMemCopy(m, triMesh.mGRB_triAdjacencies, numTris * sizeof(uint4)); m += numTris * sizeof(uint4); //GPU to CPU remap table PxMemCopy(m, triMesh.mGRB_faceRemap, numTris * sizeof(PxU32)); m += numTris * sizeof(PxU32); //GPU to CPU remap table PxMemCopy(m, triMesh.mAccumulatedTrianglesRef, numVerts * sizeof(PxU32)); reinterpret_cast(m)[numVerts] = triMesh.mNbTrianglesReferences; m += (numVerts + 1) * sizeof(PxU32); //GPU to CPU remap table PxMemCopy(m, triMesh.mTrianglesReferences, (numTris*3) * sizeof(PxU32)); m += (numTris * 3) * sizeof(PxU32); // Make sure m is 16-byte aligned m = (PxU8*)(((size_t)m + 15) & ~15); // Copy sdf values const SDF& sdfData = triMesh.getSdfDataFast(); *((uint4*)m) = make_uint4(sdfData.mDims.x, sdfData.mDims.y, sdfData.mDims.z, triMesh.getPreferSDFProjection() ? 1 : 0); m += sizeof(uint4); const PxU32 numSdfs = sdfData.mNumSdfs; PxgMeshTextureData result; PxMemZero(&result, sizeof(result)); if (numSdfs) { const PxVec3 meshLower = sdfData.mMeshLower; const PxReal spacing = sdfData.mSpacing; *((float4*)m) = make_float4(meshLower.x, meshLower.y, meshLower.z, spacing); m += sizeof(float4); *((uint4*)m) = make_uint4(sdfData.mSubgridSize, reinterpret_cast(sdfData.mSubgridsMinSdfValue), reinterpret_cast(sdfData.mSubgridsMaxSdfValue), sdfData.mNumStartSlots); m += sizeof(uint4); CUtexObject* pTexture = (CUtexObject*)m; m += sizeof(CUtexObject); CUtexObject* pTextureSubgrids = (CUtexObject*)m; m += sizeof(CUtexObject); if (sdfData.mSubgridSize > 0) PxMemCopy(m, sdfData.mSubgridStartSlots, sdfData.mNumStartSlots * sizeof(PxU32)); m += sizeof(PxU32) * sdfData.mNumStartSlots; if (sdfData.mSubgridSize == 0) { CUarray cuArray; createTextureObject(CU_AD_FORMAT_FLOAT, pTexture, cuArray, sdfData.mDims.x, sdfData.mDims.y, sdfData.mDims.z, sdfData.mSdf, stream); result.cuArray = cuArray; result.cuTexRef = *pTexture; result.cuArraySubgrids = 0; result.cuTexRefSubgrids = 0; } else { PxU32 x = sdfData.mDims.x / sdfData.mSubgridSize; PxU32 y = sdfData.mDims.y / sdfData.mSubgridSize; PxU32 z = sdfData.mDims.z / sdfData.mSubgridSize; CUarray cuArray; createTextureObject(CU_AD_FORMAT_FLOAT, pTexture, cuArray, x + 1, y + 1, z + 1, sdfData.mSdf, stream); CUarray cuArraySubgrids = 0; switch(sdfData.mBytesPerSparsePixel) { case 1: createTextureObject(CU_AD_FORMAT_UNSIGNED_INT8, pTextureSubgrids, cuArraySubgrids, sdfData.mSdfSubgrids3DTexBlockDim.x * (sdfData.mSubgridSize + 1), sdfData.mSdfSubgrids3DTexBlockDim.y * (sdfData.mSubgridSize + 1), sdfData.mSdfSubgrids3DTexBlockDim.z * (sdfData.mSubgridSize + 1), sdfData.mSubgridSdf, stream); break; case 2: createTextureObject(CU_AD_FORMAT_UNSIGNED_INT16, pTextureSubgrids, cuArraySubgrids, sdfData.mSdfSubgrids3DTexBlockDim.x * (sdfData.mSubgridSize + 1), sdfData.mSdfSubgrids3DTexBlockDim.y * (sdfData.mSubgridSize + 1), sdfData.mSdfSubgrids3DTexBlockDim.z * (sdfData.mSubgridSize + 1), reinterpret_cast(sdfData.mSubgridSdf), stream); break; case 4: createTextureObject(CU_AD_FORMAT_FLOAT, pTextureSubgrids, cuArraySubgrids, sdfData.mSdfSubgrids3DTexBlockDim.x * (sdfData.mSubgridSize + 1), sdfData.mSdfSubgrids3DTexBlockDim.y * (sdfData.mSubgridSize + 1), sdfData.mSdfSubgrids3DTexBlockDim.z * (sdfData.mSubgridSize + 1), reinterpret_cast(sdfData.mSubgridSdf), stream); break; } result.cuArray = cuArray; result.cuTexRef = *pTexture; if (cuArraySubgrids) { result.cuArraySubgrids = cuArraySubgrids; result.cuTexRefSubgrids = *pTextureSubgrids; } else { result.cuArraySubgrids = 0; result.cuTexRefSubgrids = 0; } } } return result; } static void layOutHeightfield(void* mem, const HeightFieldData& hf) { PxU8* m = (PxU8*) mem; PxU32 nbRows = hf.rows; PxU32 nbCols = hf.columns; *((PxU32* ) m) = nbRows; m += sizeof(PxU32); *((PxU32* ) m) = nbCols; m += sizeof(PxU32); PX_ASSERT(sizeof(PxU32) == sizeof(PxHeightFieldSample)); PxMemCopy(m, hf.samples, sizeof(PxU32) * nbRows * nbCols); m += sizeof(PxU32) * nbRows * nbCols; *((PxU16*)m) = hf.flags; } template static void createTextureObject(CUarray_format format, CUtexObject*& texture, CUarray& cuArray, PxU32 width, PxU32 height, PxU32 depth, const T* data, CUstream stream) { if (width == 0 || height == 0 || depth == 0) { texture = 0; cuArray = 0; return; } CUDA_ARRAY3D_DESCRIPTOR_st arrDesc; arrDesc.Format = format; arrDesc.NumChannels = 1; arrDesc.Width = width; arrDesc.Height = height; arrDesc.Depth = depth; arrDesc.Flags = 0; CUresult r = cuArray3DCreate(&cuArray, &arrDesc); PX_UNUSED(r); PX_ASSERT(r == CUDA_SUCCESS); CUDA_MEMCPY3D copyParam; PxMemZero(©Param, sizeof(copyParam)); copyParam.dstMemoryType = CU_MEMORYTYPE_ARRAY; copyParam.dstArray = cuArray; copyParam.srcMemoryType = CU_MEMORYTYPE_HOST; copyParam.srcHost = data; copyParam.srcPitch = width * sizeof(T); copyParam.srcHeight = height; copyParam.WidthInBytes = copyParam.srcPitch; copyParam.Height = height; copyParam.Depth = depth; cuMemcpy3DAsync(©Param, stream); CUDA_RESOURCE_DESC resDesc; PxMemZero(&resDesc, sizeof(resDesc)); resDesc.resType = CU_RESOURCE_TYPE_ARRAY; resDesc.res.array.hArray = cuArray; CUDA_TEXTURE_DESC texDesc; PxMemZero(&texDesc, sizeof(texDesc)); texDesc.addressMode[0] = CU_TR_ADDRESS_MODE_CLAMP; texDesc.addressMode[1] = CU_TR_ADDRESS_MODE_CLAMP; texDesc.addressMode[2] = CU_TR_ADDRESS_MODE_CLAMP; texDesc.filterMode = CU_TR_FILTER_MODE_LINEAR; r = cuTexObjectCreate(texture, &resDesc, &texDesc, NULL); PX_ASSERT(r == CUDA_SUCCESS); }