Files
XCEngine/engine/third_party/physx/source/cudamanager/src/CudaKernelWrangler.cpp

232 lines
7.0 KiB
C++

// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2025 NVIDIA Corporation. All rights reserved.
#include <stdio.h>
#include "foundation/PxPreprocessor.h"
#include "foundation/PxAssert.h"
#include "foundation/PxErrorCallback.h"
#include "foundation/PxString.h"
// from the point of view of this source file the GPU library is linked statically
#ifndef PX_PHYSX_GPU_STATIC
#define PX_PHYSX_GPU_STATIC
#endif
#include "PxPhysXGpu.h"
#if PX_LINUX && PX_CLANG
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wdocumentation"
#pragma clang diagnostic ignored "-Wdisabled-macro-expansion"
#endif
#include <cuda.h>
#if PX_LINUX && PX_CLANG
#pragma clang diagnostic pop
#endif
#include <texture_types.h>
#include <vector_types.h>
#include "cudamanager/PxCudaContextManager.h"
#include "cudamanager/PxCudaContext.h"
#include "CudaKernelWrangler.h"
using namespace physx;
const char* KernelWrangler::getCuFunctionName(uint16_t funcIndex) const
{
// PT: this is wrong, functions there are not listed in the same order as mCuFunctions
//return gFunctionTable[funcIndex].functionName;
return mKernelNames[funcIndex];
}
KernelWrangler::KernelWrangler(PxCudaContextManager& cudaContextManager, PxErrorCallback& errorCallback, const char** funcNames, uint16_t numFuncs)
: mError(false)
, mKernelNames(NULL)
, mCuFunctions("CuFunctions")
, mCudaContextManager(cudaContextManager)
, mCudaContext(cudaContextManager.getCudaContext())
, mErrorCallback(errorCallback)
{
// PT: TODO: consider revisiting this code so that the function order is kept the same between mCuFunctions and gFunctionTable.
// That way the initial getCuFunctionName implementation could be kept and we could decouple the code from the external funcNames array again.
// PT: proper names defined in PxgKernelWrangler.cpp. We assume the data there remains valid for the lifetime of the app and we don't need to do a copy.
PX_ASSERT(funcNames);
mKernelNames = funcNames;
// matchup funcNames to CUDA modules, get CUfunction handles
CUmodule* cuModules = cudaContextManager.getCuModules();
PxKernelIndex* cuFunctionTable = PxGpuGetCudaFunctionTable();
const PxU32 cuFunctionTableSize = PxGpuGetCudaFunctionTableSize();
mCuFunctions.resize(numFuncs, NULL);
if (mCudaContextManager.tryAcquireContext())
{
for (uint32_t i = 0; i < numFuncs; ++i)
{
// search through all known functions
for (uint32_t j = 0; ; ++j)
{
if (j == cuFunctionTableSize)
{
// printf("Could not find registered CUDA function '%s'.\n", funcNames[i]);
char buffer[256];
Pxsnprintf(buffer, 256, "Could not find registered CUDA function '%s'.", funcNames[i]);
mErrorCallback.reportError(PxErrorCode::eINTERNAL_ERROR, buffer, PX_FL);
mError = true;
break;
}
if (!Pxstrcmp(cuFunctionTable[j].functionName, funcNames[i]))
{
PxCUresult ret = mCudaContext->moduleGetFunction(&mCuFunctions[i], cuModules[cuFunctionTable[j].moduleIndex], funcNames[i]);
if (ret != CUDA_SUCCESS)
{
char buffer[256];
Pxsnprintf(buffer, 256, "Could not find CUDA module containing function '%s'.", funcNames[i]);
mErrorCallback.reportError(PxErrorCode::eINTERNAL_ERROR, buffer, PX_FL);
mError = true;
// return;
}
break;
}
}
}
mCudaContextManager.releaseContext();
}
else
{
char buffer[256];
Pxsnprintf(buffer, 256, "Failed to acquire the cuda context.");
mErrorCallback.reportError(PxErrorCode::eINTERNAL_ERROR, buffer, PX_FL);
mError = true;
}
}
/*
* Workaround hacks for using nvcc --compiler output object files
* without linking with CUDART. We must implement our own versions
* of these functions that the object files are hard-coded to call into.
* These calls are all made _before_ main() during static initialization
* of this DLL.
*/
#include <driver_types.h>
#if PX_WINDOWS_FAMILY
#define CUDARTAPI __stdcall
#endif
struct uint3;
struct dim3;
extern "C"
void** CUDARTAPI __cudaRegisterFatBinary(void* fatBin)
{
return PxGpuCudaRegisterFatBinary(fatBin);
}
extern "C"
void CUDARTAPI __cudaRegisterFatBinaryEnd(void ** /*fatCubinHandle*/)
{
}
extern "C"
void CUDARTAPI __cudaUnregisterFatBinary(void** fatCubinHandle)
{
// jcarius: not ideal because the module may still be loaded
PxGpuGetCudaModuleTable()[(int)(size_t) fatCubinHandle] = 0;
}
extern "C"
void CUDARTAPI __cudaRegisterTexture(void**, const struct textureReference*, const void**, const char*, int, int, int)
{
}
extern "C" void CUDARTAPI __cudaRegisterVar(void**, char*, char*, const char*, int, int, int, int)
{
}
extern "C" void CUDARTAPI __cudaRegisterShared(void**, void**)
{
}
extern "C"
void CUDARTAPI __cudaRegisterFunction(void** fatCubinHandle, const char*,
char*, const char* deviceName, int, uint3*, uint3*, dim3*, dim3*, int*)
{
PxGpuCudaRegisterFunction((int)(size_t) fatCubinHandle, deviceName);
}
/* These functions are implemented just to resolve link dependencies */
extern "C"
cudaError_t CUDARTAPI cudaLaunch(const char* entry)
{
PX_UNUSED(entry);
return cudaSuccess;
}
extern "C"
cudaError_t CUDARTAPI cudaLaunchKernel( const void* , dim3 , dim3 , void** , size_t , cudaStream_t )
{
return cudaSuccess;
}
extern "C"
cudaError_t CUDARTAPI cudaSetupArgument(const void*, size_t, size_t)
{
return cudaSuccess;
}
extern "C"
struct cudaChannelFormatDesc CUDARTAPI cudaCreateChannelDesc(
int x, int y, int z, int w, enum cudaChannelFormatKind f)
{
struct cudaChannelFormatDesc desc;
desc.x = x;
desc.y = y;
desc.z = z;
desc.w = w;
desc.f = f;
return desc;
}
extern "C"
cudaError_t CUDARTAPI __cudaPopCallConfiguration(
dim3 *,
dim3 *,
size_t *,
void *)
{
return cudaSuccess;
}