Files
XCEngine/engine/third_party/physx/source/gpucommon/src/PxgMemCopyDispatcher.cpp

68 lines
2.9 KiB
C++

// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
#include "PxgMemCopyDispatcher.h"
#include "cudamanager/PxCudaContext.h"
#include "CudaKernelWrangler.h"
#include "PxgKernelIndices.h"
namespace physx
{
void PxgMemCopyDispatcher::flushCommands(CUstream stream, PxCudaContext* cudaContext, KernelWrangler* kernelWrangler)
{
// AD - this assumes the context lock is already held?
if (mPinnedCopyBuffer.size())
{
mDeviceCopyCommands.allocate(mPinnedCopyBuffer.size() * sizeof(PxgPtrPair), PX_FL);
cudaContext->memcpyHtoDAsync(mDeviceCopyCommands.getDevicePtr(), mPinnedCopyBuffer.begin(), mPinnedCopyBuffer.size() * sizeof(PxgPtrPair), stream);
CUfunction function = kernelWrangler->getCuFunction(PxgKernelIds::COPY_USER_DATA);
PX_ASSERT(mMaxSize <= PX_MAX_U32);
const PxU32 maxS = PxU32(mMaxSize);
const PxU32 blockSize = 256;
const PxU32 numBlocks = ((maxS/4) + blockSize-1)/ blockSize;
CUdeviceptr ptr = mDeviceCopyCommands.getDevicePtr();
PxU32 count = mPinnedCopyBuffer.size();
PxCudaKernelParam kernelParams[] =
{
PX_CUDA_KERNEL_PARAM(ptr),
PX_CUDA_KERNEL_PARAM(count)
};
CUresult launchResult = cudaContext->launchKernel(function, numBlocks, count, 1, 256, 1, 1, 0, stream, kernelParams, sizeof(kernelParams), 0, PX_FL);
PX_ASSERT(launchResult == CUDA_SUCCESS);
PX_UNUSED(launchResult);
}
mPinnedCopyBuffer.forceSize_Unsafe(0);
mMaxSize = 0;
}
}