Fix D3D12 NanoVDB volume load stalls

This commit is contained in:
2026-04-11 00:27:23 +08:00
parent be5dabd820
commit 4080b2e5fe
11 changed files with 1328 additions and 36 deletions

View File

@@ -17,7 +17,10 @@
#include "XCEngine/RHI/D3D12/D3D12ResourceView.h"
#include "XCEngine/RHI/D3D12/D3D12RenderPass.h"
#include "XCEngine/RHI/D3D12/D3D12Framebuffer.h"
#include "XCEngine/Debug/Logger.h"
#include <algorithm>
#include <chrono>
#include <cstring>
#include <stdio.h>
#include <memory>
#include <string>
@@ -41,11 +44,51 @@ std::string NarrowAscii(const std::wstring& value) {
return result;
}
uint64_t GetVolumeTraceSteadyMs();
void LogVolumeTraceRendering(const std::string& message);
bool HasShaderPayload(const ShaderCompileDesc& desc) {
return !desc.source.empty() || !desc.fileName.empty();
}
bool ShouldTraceVolumetricShaderCompile(const ShaderCompileDesc& desc) {
const std::string fileName = NarrowAscii(desc.fileName);
if (fileName.find("volumetric") != std::string::npos) {
return true;
}
if (!desc.source.empty()) {
const std::string sourceText(desc.source.begin(), desc.source.end());
if (sourceText.find("PNANOVDB_HLSL") != std::string::npos ||
sourceText.find("VolumeData") != std::string::npos) {
return true;
}
}
return false;
}
std::string DescribeShaderCompileDesc(const ShaderCompileDesc& desc) {
std::string description =
"entry=" + NarrowAscii(desc.entryPoint) +
" profile=" + NarrowAscii(desc.profile) +
" source_bytes=" + std::to_string(desc.source.size()) +
" macro_count=" + std::to_string(desc.macros.size());
if (!desc.fileName.empty()) {
description += " file=" + NarrowAscii(desc.fileName);
}
return description;
}
bool CompileD3D12Shader(const ShaderCompileDesc& desc, D3D12Shader& shader) {
const bool traceShaderCompile = ShouldTraceVolumetricShaderCompile(desc);
const uint64_t compileStartMs = traceShaderCompile ? GetVolumeTraceSteadyMs() : 0u;
if (traceShaderCompile) {
LogVolumeTraceRendering(
"D3D12 shader compile begin steady_ms=" + std::to_string(compileStartMs) + " " +
DescribeShaderCompileDesc(desc));
}
const std::string entryPoint = NarrowAscii(desc.entryPoint);
const std::string profile = NarrowAscii(desc.profile);
const char* entryPointPtr = entryPoint.empty() ? nullptr : entryPoint.c_str();
@@ -76,19 +119,44 @@ bool CompileD3D12Shader(const ShaderCompileDesc& desc, D3D12Shader& shader) {
}
const D3D_SHADER_MACRO* macroPtr = macroTable.empty() ? nullptr : macroTable.data();
return shader.Compile(
const bool compiled = shader.Compile(
desc.source.data(),
desc.source.size(),
desc.fileName.empty() ? nullptr : desc.fileName.c_str(),
macroPtr,
entryPointPtr,
profilePtr);
if (traceShaderCompile) {
const uint64_t compileEndMs = GetVolumeTraceSteadyMs();
LogVolumeTraceRendering(
std::string("D3D12 shader compile ") + (compiled ? "end" : "failed") +
" steady_ms=" + std::to_string(compileEndMs) +
" total_ms=" + std::to_string(compileEndMs - compileStartMs) + " " +
DescribeShaderCompileDesc(desc));
}
return compiled;
}
if (!desc.fileName.empty()) {
return shader.CompileFromFile(desc.fileName.c_str(), entryPointPtr, profilePtr);
const bool compiled = shader.CompileFromFile(desc.fileName.c_str(), entryPointPtr, profilePtr);
if (traceShaderCompile) {
const uint64_t compileEndMs = GetVolumeTraceSteadyMs();
LogVolumeTraceRendering(
std::string("D3D12 shader compile ") + (compiled ? "end" : "failed") +
" steady_ms=" + std::to_string(compileEndMs) +
" total_ms=" + std::to_string(compileEndMs - compileStartMs) + " " +
DescribeShaderCompileDesc(desc));
}
return compiled;
}
if (traceShaderCompile) {
const uint64_t compileEndMs = GetVolumeTraceSteadyMs();
LogVolumeTraceRendering(
"D3D12 shader compile failed steady_ms=" + std::to_string(compileEndMs) +
" total_ms=" + std::to_string(compileEndMs - compileStartMs) +
" reason=empty_shader_payload " + DescribeShaderCompileDesc(desc));
}
return false;
}
@@ -318,6 +386,24 @@ bool IsSupportedBufferViewDimension(ResourceViewDimension dimension) {
dimension == ResourceViewDimension::RawBuffer;
}
uint64_t GetVolumeTraceSteadyMs() {
using Clock = std::chrono::steady_clock;
static const Clock::time_point s_start = Clock::now();
return static_cast<uint64_t>(std::chrono::duration_cast<std::chrono::milliseconds>(
Clock::now() - s_start).count());
}
bool ShouldTraceLargeStorageBuffer(const BufferDesc& desc) {
return static_cast<BufferType>(desc.bufferType) == BufferType::Storage &&
desc.size >= 32ull * 1024ull * 1024ull;
}
void LogVolumeTraceRendering(const std::string& message) {
Containers::String entry("[VolumeTrace] ");
entry += message.c_str();
Debug::Logger::Get().Info(Debug::LogCategory::Rendering, entry);
}
uint32_t ResolveBufferViewElementStride(RHIBuffer* buffer, const ResourceViewDesc& desc) {
if (desc.dimension == ResourceViewDimension::RawBuffer) {
return 4u;
@@ -796,6 +882,13 @@ RHIBuffer* D3D12Device::CreateBuffer(const BufferDesc& desc) {
}
}
if (ShouldTraceLargeStorageBuffer(desc)) {
LogVolumeTraceRendering(
"D3D12 CreateBuffer legacy size_bytes=" + std::to_string(desc.size) +
" heap=" + std::to_string(static_cast<int>(heapType)) +
" flags=" + std::to_string(static_cast<unsigned long long>(desc.flags)));
}
if (buffer->Initialize(m_device.Get(), desc.size, initialState, heapType, resourceFlags)) {
buffer->SetStride(desc.stride);
buffer->SetBufferType(bufferType);
@@ -806,6 +899,209 @@ RHIBuffer* D3D12Device::CreateBuffer(const BufferDesc& desc) {
return nullptr;
}
RHIBuffer* D3D12Device::CreateBuffer(
const BufferDesc& desc,
const void* initialData,
size_t initialDataSize,
ResourceStates finalState) {
const bool traceLargeStorageBuffer = ShouldTraceLargeStorageBuffer(desc);
const uint64_t traceStartMs = traceLargeStorageBuffer ? GetVolumeTraceSteadyMs() : 0u;
if (traceLargeStorageBuffer) {
LogVolumeTraceRendering(
"D3D12 CreateBuffer(initialData) begin steady_ms=" + std::to_string(traceStartMs) +
" size_bytes=" + std::to_string(desc.size) +
" initial_bytes=" + std::to_string(initialDataSize));
}
if (initialData == nullptr || initialDataSize == 0u) {
return CreateBuffer(desc);
}
if (m_device == nullptr ||
desc.size == 0u ||
initialDataSize > desc.size ||
desc.size > static_cast<uint64_t>(SIZE_MAX)) {
if (traceLargeStorageBuffer) {
LogVolumeTraceRendering("D3D12 CreateBuffer(initialData) rejected invalid parameters");
}
return nullptr;
}
const BufferType bufferType = static_cast<BufferType>(desc.bufferType);
if (bufferType == BufferType::ReadBack) {
if (traceLargeStorageBuffer) {
LogVolumeTraceRendering("D3D12 CreateBuffer(initialData) rejected readback buffer");
}
return nullptr;
}
const BufferFlags bufferFlags = static_cast<BufferFlags>(desc.flags);
D3D12_RESOURCE_FLAGS resourceFlags = D3D12_RESOURCE_FLAG_NONE;
if ((bufferFlags & BufferFlags::AllowUnorderedAccess) == BufferFlags::AllowUnorderedAccess) {
resourceFlags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
}
D3D12CommandQueue uploadQueue;
if (!uploadQueue.Initialize(m_device.Get(), CommandQueueType::Direct)) {
if (traceLargeStorageBuffer) {
LogVolumeTraceRendering("D3D12 CreateBuffer(initialData) failed stage=init_queue");
}
return nullptr;
}
D3D12CommandAllocator uploadAllocator;
if (!uploadAllocator.Initialize(m_device.Get(), CommandQueueType::Direct)) {
if (traceLargeStorageBuffer) {
LogVolumeTraceRendering("D3D12 CreateBuffer(initialData) failed stage=init_allocator");
}
uploadQueue.Shutdown();
return nullptr;
}
D3D12CommandList uploadCommandList;
if (!uploadCommandList.Initialize(m_device.Get(), CommandQueueType::Direct, uploadAllocator.GetCommandAllocator())) {
if (traceLargeStorageBuffer) {
LogVolumeTraceRendering("D3D12 CreateBuffer(initialData) failed stage=init_command_list");
}
uploadAllocator.Shutdown();
uploadQueue.Shutdown();
return nullptr;
}
auto shutdownUploadContext = [&]() {
uploadCommandList.Shutdown();
uploadAllocator.Shutdown();
uploadQueue.Shutdown();
};
uploadAllocator.Reset();
uploadCommandList.Reset();
const uint64_t commandSetupEndMs = traceLargeStorageBuffer ? GetVolumeTraceSteadyMs() : 0u;
auto* buffer = new D3D12Buffer();
if (!buffer->Initialize(
m_device.Get(),
desc.size,
D3D12_RESOURCE_STATE_COPY_DEST,
D3D12_HEAP_TYPE_DEFAULT,
resourceFlags)) {
if (traceLargeStorageBuffer) {
LogVolumeTraceRendering("D3D12 CreateBuffer(initialData) failed stage=create_default_buffer");
}
delete buffer;
shutdownUploadContext();
return nullptr;
}
const uint64_t defaultBufferEndMs = traceLargeStorageBuffer ? GetVolumeTraceSteadyMs() : 0u;
buffer->SetStride(desc.stride);
buffer->SetBufferType(bufferType);
buffer->SetState(ResourceStates::CopyDst);
D3D12_HEAP_PROPERTIES uploadHeapProperties = {};
uploadHeapProperties.Type = D3D12_HEAP_TYPE_UPLOAD;
uploadHeapProperties.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
uploadHeapProperties.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
uploadHeapProperties.CreationNodeMask = 0;
uploadHeapProperties.VisibleNodeMask = 0;
D3D12_RESOURCE_DESC uploadBufferDesc = {};
uploadBufferDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
uploadBufferDesc.Alignment = 0;
uploadBufferDesc.Width = desc.size;
uploadBufferDesc.Height = 1;
uploadBufferDesc.DepthOrArraySize = 1;
uploadBufferDesc.MipLevels = 1;
uploadBufferDesc.Format = DXGI_FORMAT_UNKNOWN;
uploadBufferDesc.SampleDesc.Count = 1;
uploadBufferDesc.SampleDesc.Quality = 0;
uploadBufferDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
uploadBufferDesc.Flags = D3D12_RESOURCE_FLAG_NONE;
ComPtr<ID3D12Resource> uploadBuffer;
if (FAILED(m_device->CreateCommittedResource(
&uploadHeapProperties,
D3D12_HEAP_FLAG_NONE,
&uploadBufferDesc,
D3D12_RESOURCE_STATE_GENERIC_READ,
nullptr,
IID_PPV_ARGS(&uploadBuffer)))) {
if (traceLargeStorageBuffer) {
LogVolumeTraceRendering("D3D12 CreateBuffer(initialData) failed stage=create_upload_buffer");
}
buffer->Shutdown();
delete buffer;
shutdownUploadContext();
return nullptr;
}
const uint64_t uploadBufferEndMs = traceLargeStorageBuffer ? GetVolumeTraceSteadyMs() : 0u;
void* mappedData = nullptr;
D3D12_RANGE readRange = { 0, 0 };
if (FAILED(uploadBuffer->Map(0, &readRange, &mappedData)) || mappedData == nullptr) {
if (traceLargeStorageBuffer) {
LogVolumeTraceRendering("D3D12 CreateBuffer(initialData) failed stage=map_upload_buffer");
}
buffer->Shutdown();
delete buffer;
shutdownUploadContext();
return nullptr;
}
std::memset(mappedData, 0, static_cast<size_t>(desc.size));
std::memcpy(mappedData, initialData, initialDataSize);
uploadBuffer->Unmap(0, nullptr);
const uint64_t cpuFillEndMs = traceLargeStorageBuffer ? GetVolumeTraceSteadyMs() : 0u;
ID3D12GraphicsCommandList* const commandList = uploadCommandList.GetCommandList();
commandList->CopyBufferRegion(
buffer->GetResource(),
0,
uploadBuffer.Get(),
0,
desc.size);
const D3D12_RESOURCE_STATES resolvedFinalState = ToD3D12(finalState);
if (resolvedFinalState != D3D12_RESOURCE_STATE_COPY_DEST) {
D3D12_RESOURCE_BARRIER barrier = {};
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
barrier.Transition.pResource = buffer->GetResource();
barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST;
barrier.Transition.StateAfter = resolvedFinalState;
barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
commandList->ResourceBarrier(1, &barrier);
}
const uint64_t recordCommandsEndMs = traceLargeStorageBuffer ? GetVolumeTraceSteadyMs() : 0u;
uploadCommandList.Close();
ID3D12CommandList* commandLists[] = { commandList };
uploadQueue.ExecuteCommandListsInternal(1, commandLists);
const uint64_t submitEndMs = traceLargeStorageBuffer ? GetVolumeTraceSteadyMs() : 0u;
if (traceLargeStorageBuffer) {
LogVolumeTraceRendering(
"D3D12 CreateBuffer(initialData) waiting_for_idle steady_ms=" + std::to_string(submitEndMs) +
" size_bytes=" + std::to_string(desc.size));
}
uploadQueue.WaitForIdle();
const uint64_t waitEndMs = traceLargeStorageBuffer ? GetVolumeTraceSteadyMs() : 0u;
buffer->SetState(finalState);
if (traceLargeStorageBuffer) {
LogVolumeTraceRendering(
"D3D12 CreateBuffer(initialData) end steady_ms=" + std::to_string(waitEndMs) +
" total_ms=" + std::to_string(waitEndMs - traceStartMs) +
" setup_ms=" + std::to_string(commandSetupEndMs - traceStartMs) +
" default_buffer_ms=" + std::to_string(defaultBufferEndMs - commandSetupEndMs) +
" upload_buffer_ms=" + std::to_string(uploadBufferEndMs - defaultBufferEndMs) +
" cpu_fill_ms=" + std::to_string(cpuFillEndMs - uploadBufferEndMs) +
" record_ms=" + std::to_string(recordCommandsEndMs - cpuFillEndMs) +
" submit_ms=" + std::to_string(submitEndMs - recordCommandsEndMs) +
" wait_ms=" + std::to_string(waitEndMs - submitEndMs));
}
shutdownUploadContext();
return buffer;
}
RHITexture* D3D12Device::CreateTexture(const TextureDesc& desc) {
auto* texture = new D3D12Texture();
D3D12_RESOURCE_DESC d3d12Desc = {};
@@ -1112,6 +1408,19 @@ RHICommandQueue* D3D12Device::CreateCommandQueue(const CommandQueueDesc& desc) {
}
RHIPipelineState* D3D12Device::CreatePipelineState(const GraphicsPipelineDesc& desc) {
const bool traceVolumetricPipeline =
ShouldTraceVolumetricShaderCompile(desc.vertexShader) ||
ShouldTraceVolumetricShaderCompile(desc.fragmentShader) ||
ShouldTraceVolumetricShaderCompile(desc.geometryShader);
const uint64_t pipelineStartMs = traceVolumetricPipeline ? GetVolumeTraceSteadyMs() : 0u;
if (traceVolumetricPipeline) {
LogVolumeTraceRendering(
"D3D12 CreatePipelineState begin steady_ms=" + std::to_string(pipelineStartMs) +
" has_vs=" + std::to_string(HasShaderPayload(desc.vertexShader) ? 1 : 0) +
" has_ps=" + std::to_string(HasShaderPayload(desc.fragmentShader) ? 1 : 0) +
" has_gs=" + std::to_string(HasShaderPayload(desc.geometryShader) ? 1 : 0));
}
auto* pso = new D3D12PipelineState(m_device.Get());
pso->SetInputLayout(desc.inputLayout);
pso->SetRasterizerState(desc.rasterizerState);
@@ -1120,6 +1429,7 @@ RHIPipelineState* D3D12Device::CreatePipelineState(const GraphicsPipelineDesc& d
pso->SetTopology(desc.topologyType);
pso->SetRenderTargetFormats(desc.renderTargetCount, desc.renderTargetFormats, desc.depthStencilFormat);
pso->SetSampleCount(desc.sampleCount);
pso->SetSampleQuality(desc.sampleQuality);
const bool hasVertexShader = HasShaderPayload(desc.vertexShader);
const bool hasFragmentShader = HasShaderPayload(desc.fragmentShader);
@@ -1154,6 +1464,15 @@ RHIPipelineState* D3D12Device::CreatePipelineState(const GraphicsPipelineDesc& d
const bool geometryCompiled = !hasGeometryShader || CompileD3D12Shader(desc.geometryShader, geometryShader);
if (!vertexCompiled || !fragmentCompiled || !geometryCompiled) {
if (traceVolumetricPipeline) {
const uint64_t failureMs = GetVolumeTraceSteadyMs();
LogVolumeTraceRendering(
"D3D12 CreatePipelineState failed steady_ms=" + std::to_string(failureMs) +
" total_ms=" + std::to_string(failureMs - pipelineStartMs) +
" vertex_ok=" + std::to_string(vertexCompiled ? 1 : 0) +
" fragment_ok=" + std::to_string(fragmentCompiled ? 1 : 0) +
" geometry_ok=" + std::to_string(geometryCompiled ? 1 : 0));
}
if (rootSignature != nullptr) {
rootSignature->Shutdown();
delete rootSignature;
@@ -1166,7 +1485,19 @@ RHIPipelineState* D3D12Device::CreatePipelineState(const GraphicsPipelineDesc& d
vertexShader.GetD3D12Bytecode(),
fragmentShader.GetD3D12Bytecode(),
hasGeometryShader ? geometryShader.GetD3D12Bytecode() : D3D12_SHADER_BYTECODE{});
const uint64_t finalizeStartMs = traceVolumetricPipeline ? GetVolumeTraceSteadyMs() : 0u;
if (traceVolumetricPipeline) {
LogVolumeTraceRendering(
"D3D12 CreatePipelineState finalize begin steady_ms=" + std::to_string(finalizeStartMs));
}
pso->EnsureValid();
if (traceVolumetricPipeline) {
const uint64_t finalizeEndMs = GetVolumeTraceSteadyMs();
LogVolumeTraceRendering(
"D3D12 CreatePipelineState finalize end steady_ms=" + std::to_string(finalizeEndMs) +
" total_ms=" + std::to_string(finalizeEndMs - finalizeStartMs) +
" valid=" + std::to_string(pso->IsValid() ? 1 : 0));
}
if (rootSignature != nullptr) {
rootSignature->Shutdown();
@@ -1174,10 +1505,22 @@ RHIPipelineState* D3D12Device::CreatePipelineState(const GraphicsPipelineDesc& d
}
if (!pso->IsValid()) {
if (traceVolumetricPipeline) {
const uint64_t failureMs = GetVolumeTraceSteadyMs();
LogVolumeTraceRendering(
"D3D12 CreatePipelineState invalid steady_ms=" + std::to_string(failureMs) +
" total_ms=" + std::to_string(failureMs - pipelineStartMs));
}
delete pso;
return nullptr;
}
if (traceVolumetricPipeline) {
const uint64_t pipelineEndMs = GetVolumeTraceSteadyMs();
LogVolumeTraceRendering(
"D3D12 CreatePipelineState end steady_ms=" + std::to_string(pipelineEndMs) +
" total_ms=" + std::to_string(pipelineEndMs - pipelineStartMs));
}
return pso;
}