#include "XC3DGSD3D12/App.h" #include #include #include #include #include #include #include #include #include #include "XCEngine/RHI/D3D12/D3D12Screenshot.h" #include "XCEngine/RHI/RHIDescriptorPool.h" #include "XCEngine/RHI/RHIDescriptorSet.h" #include "XCEngine/RHI/RHIPipelineLayout.h" #include "XCEngine/RHI/RHIPipelineState.h" namespace XC3DGSD3D12 { using namespace XCEngine::RHI; namespace { constexpr wchar_t kWindowClassName[] = L"XC3DGSD3D12WindowClass"; constexpr wchar_t kWindowTitle[] = L"XC 3DGS D3D12 MVS - Phase 3"; constexpr float kClearColor[4] = { 0.04f, 0.05f, 0.07f, 1.0f }; constexpr uint32_t kPrepareThreadGroupSize = 64u; constexpr uint32_t kSortThreadGroupSize = 64u; constexpr uint32_t kDeviceRadixSortPartitionSize = 3840u; constexpr uint32_t kDeviceRadixSortRadix = 256u; constexpr uint32_t kDeviceRadixSortPassCount = 4u; constexpr bool kUseCpuSortBaseline = true; struct FrameConstants { float viewProjection[16] = {}; float view[16] = {}; float projection[16] = {}; float cameraWorldPos[4] = {}; float screenParams[4] = {}; float settings[4] = {}; }; struct RadixSortConstants { uint32_t numKeys = 0; uint32_t radixShift = 0; uint32_t threadBlocks = 0; uint32_t padding = 0; }; static_assert(sizeof(FrameConstants) % 16 == 0, "Frame constants must stay 16-byte aligned."); static_assert(sizeof(RadixSortConstants) % 16 == 0, "Radix sort constants must stay 16-byte aligned."); static_assert(sizeof(PreparedSplatView) == 40, "Prepared view buffer layout must match shader."); std::filesystem::path GetExecutableDirectory() { std::wstring pathBuffer; pathBuffer.resize(MAX_PATH); const DWORD pathLength = GetModuleFileNameW(nullptr, pathBuffer.data(), static_cast(pathBuffer.size())); pathBuffer.resize(pathLength); return std::filesystem::path(pathBuffer).parent_path(); } std::filesystem::path ResolveNearExecutable(const std::wstring& path) { const std::filesystem::path inputPath(path); if (inputPath.is_absolute()) { return inputPath; } return GetExecutableDirectory() / inputPath; } std::filesystem::path ResolveShaderPath(std::wstring_view fileName) { return GetExecutableDirectory() / L"shaders" / std::filesystem::path(fileName); } std::vector LoadBinaryFile(const std::filesystem::path& filePath) { std::ifstream input(filePath, std::ios::binary); if (!input.is_open()) { return {}; } input.seekg(0, std::ios::end); const std::streamoff size = input.tellg(); if (size <= 0) { return {}; } input.seekg(0, std::ios::beg); std::vector bytes(static_cast(size)); input.read(reinterpret_cast(bytes.data()), size); if (!input) { return {}; } return bytes; } std::string NarrowAscii(std::wstring_view text) { std::string result; result.reserve(text.size()); for (wchar_t ch : text) { result.push_back(ch >= 0 && ch <= 0x7F ? static_cast(ch) : '?'); } return result; } void AppendTrace(std::string_view message) { const std::filesystem::path tracePath = GetExecutableDirectory() / "phase3_trace.log"; std::ofstream file(tracePath, std::ios::app); if (!file.is_open()) { return; } file << GetTickCount64() << " | " << message << '\n'; } void AppendTrace(const std::wstring& message) { AppendTrace(NarrowAscii(message)); } ShaderCompileDesc BuildDxilShaderDesc(const std::filesystem::path& compiledShaderPath, const std::wstring& profile) { ShaderCompileDesc shaderDesc = {}; shaderDesc.profile = profile; shaderDesc.compiledBinaryBackend = ShaderBinaryBackend::D3D12; shaderDesc.compiledBinary = LoadBinaryFile(compiledShaderPath); return shaderDesc; } void StoreMatrixTransposed(const DirectX::XMMATRIX& matrix, float* destination) { DirectX::XMFLOAT4X4 output = {}; DirectX::XMStoreFloat4x4(&output, DirectX::XMMatrixTranspose(matrix)); std::memcpy(destination, &output, sizeof(output)); } DirectX::XMMATRIX BuildSortViewMatrix() { using namespace DirectX; const XMVECTOR eye = XMVectorSet(0.0f, 0.5f, 1.0f, 1.0f); const XMVECTOR target = XMVectorSet(3.0f, 0.5f, 6.196152f, 1.0f); const XMVECTOR up = XMVectorSet(0.0f, 1.0f, 0.0f, 0.0f); return XMMatrixLookAtRH(eye, target, up); } uint32_t FloatToSortableUint(float value) { const uint32_t bits = std::bit_cast(value); const uint32_t mask = (0u - (bits >> 31)) | 0x80000000u; return bits ^ mask; } void BuildCpuSortedOrder( const GaussianSplatRuntimeData& sceneData, std::vector& outOrder, std::vector* outSortedKeys = nullptr) { using namespace DirectX; const XMMATRIX view = BuildSortViewMatrix(); const float* positionBytes = reinterpret_cast(sceneData.positionData.data()); std::vector> sortablePairs(sceneData.splatCount); for (uint32_t index = 0; index < sceneData.splatCount; ++index) { const float* position = positionBytes + index * 3u; const XMVECTOR worldPosition = XMVectorSet(position[0], position[1], position[2], 1.0f); const XMVECTOR viewPosition = XMVector4Transform(worldPosition, view); sortablePairs[index] = { FloatToSortableUint(XMVectorGetZ(viewPosition)), index, }; } std::stable_sort( sortablePairs.begin(), sortablePairs.end(), [](const auto& left, const auto& right) { return left.first < right.first; }); outOrder.resize(sortablePairs.size()); if (outSortedKeys != nullptr) { outSortedKeys->resize(sortablePairs.size()); } for (size_t index = 0; index < sortablePairs.size(); ++index) { outOrder[index] = sortablePairs[index].second; if (outSortedKeys != nullptr) { (*outSortedKeys)[index] = sortablePairs[index].first; } } } FrameConstants BuildFrameConstants(uint32_t width, uint32_t height, uint32_t splatCount) { using namespace DirectX; const float aspect = height > 0 ? static_cast(width) / static_cast(height) : 1.0f; const XMMATRIX view = BuildSortViewMatrix(); const XMMATRIX projection = XMMatrixPerspectiveFovRH(XMConvertToRadians(60.0f), aspect, 0.1f, 200.0f); const XMMATRIX viewProjection = XMMatrixMultiply(view, projection); FrameConstants constants = {}; StoreMatrixTransposed(viewProjection, constants.viewProjection); StoreMatrixTransposed(view, constants.view); StoreMatrixTransposed(projection, constants.projection); constants.cameraWorldPos[0] = 0.0f; constants.cameraWorldPos[1] = 0.5f; constants.cameraWorldPos[2] = 1.0f; constants.cameraWorldPos[3] = 1.0f; constants.screenParams[0] = static_cast(width); constants.screenParams[1] = static_cast(height); constants.screenParams[2] = width > 0 ? 1.0f / static_cast(width) : 0.0f; constants.screenParams[3] = height > 0 ? 1.0f / static_cast(height) : 0.0f; constants.settings[0] = static_cast(splatCount); constants.settings[1] = 1.0f; // opacity scale constants.settings[2] = 3.0f; // SH order constants.settings[3] = 0.9f; return constants; } template void ShutdownAndDelete(T*& object) { if (object != nullptr) { object->Shutdown(); delete object; object = nullptr; } } } // namespace App::App() = default; App::~App() { Shutdown(); } bool App::Initialize(HINSTANCE instance, int showCommand) { AppendTrace("Initialize: begin"); m_instance = instance; m_lastErrorMessage.clear(); AppendTrace("Initialize: LoadGaussianScene"); if (!LoadGaussianScene()) { AppendTrace(std::string("Initialize: LoadGaussianScene failed: ") + NarrowAscii(m_lastErrorMessage)); return false; } AppendTrace("Initialize: RegisterWindowClass"); if (!RegisterWindowClass(instance)) { m_lastErrorMessage = L"Failed to register the Win32 window class."; AppendTrace(std::string("Initialize: RegisterWindowClass failed: ") + NarrowAscii(m_lastErrorMessage)); return false; } AppendTrace("Initialize: CreateMainWindow"); if (!CreateMainWindow(instance, showCommand)) { m_lastErrorMessage = L"Failed to create the main window."; AppendTrace(std::string("Initialize: CreateMainWindow failed: ") + NarrowAscii(m_lastErrorMessage)); return false; } AppendTrace("Initialize: InitializeRhi"); if (!InitializeRhi()) { if (m_lastErrorMessage.empty()) { m_lastErrorMessage = L"Failed to initialize the D3D12 RHI objects."; } AppendTrace(std::string("Initialize: InitializeRhi failed: ") + NarrowAscii(m_lastErrorMessage)); return false; } AppendTrace("Initialize: InitializeGaussianGpuResources"); if (!InitializeGaussianGpuResources()) { AppendTrace(std::string("Initialize: InitializeGaussianGpuResources failed: ") + NarrowAscii(m_lastErrorMessage)); return false; } AppendTrace("Initialize: InitializePreparePassResources"); if (!InitializePreparePassResources()) { AppendTrace(std::string("Initialize: InitializePreparePassResources failed: ") + NarrowAscii(m_lastErrorMessage)); return false; } AppendTrace("Initialize: InitializeSortResources"); if (!InitializeSortResources()) { AppendTrace(std::string("Initialize: InitializeSortResources failed: ") + NarrowAscii(m_lastErrorMessage)); return false; } AppendTrace("Initialize: InitializeDebugDrawResources"); if (!InitializeDebugDrawResources()) { AppendTrace(std::string("Initialize: InitializeDebugDrawResources failed: ") + NarrowAscii(m_lastErrorMessage)); return false; } m_isInitialized = true; m_running = true; AppendTrace("Initialize: success"); return true; } void App::SetFrameLimit(unsigned int frameLimit) { m_frameLimit = frameLimit; } void App::SetGaussianScenePath(std::wstring scenePath) { m_gaussianScenePath = std::move(scenePath); } void App::SetSummaryPath(std::wstring summaryPath) { m_summaryPath = std::move(summaryPath); } void App::SetScreenshotPath(std::wstring screenshotPath) { m_screenshotPath = std::move(screenshotPath); } const std::wstring& App::GetLastErrorMessage() const { return m_lastErrorMessage; } int App::Run() { AppendTrace("Run: begin"); MSG message = {}; int exitCode = 0; while (m_running) { while (PeekMessage(&message, nullptr, 0, 0, PM_REMOVE)) { if (message.message == WM_QUIT) { exitCode = static_cast(message.wParam); m_running = false; break; } TranslateMessage(&message); DispatchMessage(&message); } if (!m_running) { break; } const bool captureScreenshot = m_frameLimit > 0 && (m_renderedFrameCount + 1) >= m_frameLimit; AppendTrace(captureScreenshot ? "Run: RenderFrame capture" : "Run: RenderFrame"); RenderFrame(captureScreenshot); AppendTrace("Run: RenderFrame complete"); ++m_renderedFrameCount; if (m_frameLimit > 0 && m_renderedFrameCount >= m_frameLimit) { m_running = false; AppendTrace("Run: frame limit reached, posting WM_CLOSE"); PostMessageW(m_hwnd, WM_CLOSE, 0, 0); } } AppendTrace("Run: end"); return exitCode; } LRESULT CALLBACK App::StaticWindowProc(HWND hwnd, UINT message, WPARAM wParam, LPARAM lParam) { App* app = nullptr; if (message == WM_NCCREATE) { CREATESTRUCTW* createStruct = reinterpret_cast(lParam); app = reinterpret_cast(createStruct->lpCreateParams); SetWindowLongPtrW(hwnd, GWLP_USERDATA, reinterpret_cast(app)); } else { app = reinterpret_cast(GetWindowLongPtrW(hwnd, GWLP_USERDATA)); } if (app != nullptr) { return app->WindowProc(hwnd, message, wParam, lParam); } return DefWindowProcW(hwnd, message, wParam, lParam); } LRESULT App::WindowProc(HWND hwnd, UINT message, WPARAM wParam, LPARAM lParam) { switch (message) { case WM_CLOSE: DestroyWindow(hwnd); return 0; case WM_DESTROY: PostQuitMessage(0); return 0; default: return DefWindowProcW(hwnd, message, wParam, lParam); } } bool App::RegisterWindowClass(HINSTANCE instance) { WNDCLASSEXW windowClass = {}; windowClass.cbSize = sizeof(WNDCLASSEXW); windowClass.style = CS_HREDRAW | CS_VREDRAW; windowClass.lpfnWndProc = &App::StaticWindowProc; windowClass.hInstance = instance; windowClass.hCursor = LoadCursorW(nullptr, IDC_ARROW); windowClass.lpszClassName = kWindowClassName; return RegisterClassExW(&windowClass) != 0; } bool App::CreateMainWindow(HINSTANCE instance, int showCommand) { RECT windowRect = { 0, 0, m_width, m_height }; AdjustWindowRect(&windowRect, WS_OVERLAPPEDWINDOW, FALSE); m_hwnd = CreateWindowExW( 0, kWindowClassName, kWindowTitle, WS_OVERLAPPEDWINDOW, CW_USEDEFAULT, CW_USEDEFAULT, windowRect.right - windowRect.left, windowRect.bottom - windowRect.top, nullptr, nullptr, instance, this); if (m_hwnd == nullptr) { return false; } ShowWindow(m_hwnd, showCommand); UpdateWindow(m_hwnd); return true; } bool App::LoadGaussianScene() { std::string errorMessage; const std::filesystem::path scenePath = ResolveNearExecutable(m_gaussianScenePath); if (!LoadGaussianSceneFromPly(scenePath, m_gaussianSceneData, errorMessage)) { m_lastErrorMessage.assign(errorMessage.begin(), errorMessage.end()); return false; } if (!m_summaryPath.empty()) { const std::filesystem::path summaryPath = ResolveNearExecutable(m_summaryPath); if (!WriteGaussianSceneSummary(summaryPath, m_gaussianSceneData, errorMessage)) { m_lastErrorMessage.assign(errorMessage.begin(), errorMessage.end()); return false; } } return true; } bool App::InitializeRhi() { RHIDeviceDesc deviceDesc = {}; deviceDesc.adapterIndex = 0; deviceDesc.enableDebugLayer = false; deviceDesc.enableGPUValidation = false; if (!m_device.Initialize(deviceDesc)) { m_lastErrorMessage = L"Failed to initialize the XCEngine D3D12 device."; return false; } ID3D12Device* device = m_device.GetDevice(); IDXGIFactory4* factory = m_device.GetFactory(); D3D12_FEATURE_DATA_D3D12_OPTIONS1 options1 = {}; if (SUCCEEDED(device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS1, &options1, sizeof(options1)))) { AppendTrace( "InitializeRhi: wave ops min=" + std::to_string(options1.WaveLaneCountMin) + " max=" + std::to_string(options1.WaveLaneCountMax) + " total_lane_count=" + std::to_string(options1.TotalLaneCount)); } if (!m_commandQueue.Initialize(device, CommandQueueType::Direct)) { m_lastErrorMessage = L"Failed to initialize the direct command queue."; return false; } if (!m_swapChain.Initialize(factory, m_commandQueue.GetCommandQueue(), m_hwnd, m_width, m_height, kBackBufferCount)) { m_lastErrorMessage = L"Failed to initialize the swap chain."; return false; } if (!m_depthStencil.InitializeDepthStencil(device, m_width, m_height)) { m_lastErrorMessage = L"Failed to initialize the depth stencil texture."; return false; } if (!m_rtvHeap.Initialize(device, DescriptorHeapType::RTV, kBackBufferCount)) { m_lastErrorMessage = L"Failed to initialize the RTV descriptor heap."; return false; } if (!m_dsvHeap.Initialize(device, DescriptorHeapType::DSV, 1)) { m_lastErrorMessage = L"Failed to initialize the DSV descriptor heap."; return false; } for (int index = 0; index < kBackBufferCount; ++index) { D3D12Texture& backBuffer = m_swapChain.GetBackBuffer(index); D3D12_RENDER_TARGET_VIEW_DESC renderTargetDesc = D3D12ResourceView::CreateRenderTargetDesc(Format::R8G8B8A8_UNorm, D3D12_RTV_DIMENSION_TEXTURE2D); m_rtvs[index].InitializeAsRenderTarget(device, backBuffer.GetResource(), &renderTargetDesc, &m_rtvHeap, index); } D3D12_DEPTH_STENCIL_VIEW_DESC depthStencilDesc = D3D12ResourceView::CreateDepthStencilDesc(Format::D24_UNorm_S8_UInt, D3D12_DSV_DIMENSION_TEXTURE2D); m_dsv.InitializeAsDepthStencil(device, m_depthStencil.GetResource(), &depthStencilDesc, &m_dsvHeap, 0); if (!m_commandAllocator.Initialize(device, CommandQueueType::Direct)) { m_lastErrorMessage = L"Failed to initialize the command allocator."; return false; } if (!m_commandList.Initialize(device, CommandQueueType::Direct, m_commandAllocator.GetCommandAllocator())) { m_lastErrorMessage = L"Failed to initialize the command list."; return false; } return true; } bool App::InitializeGaussianGpuResources() { ID3D12Device* device = m_device.GetDevice(); ID3D12GraphicsCommandList* commandList = m_commandList.GetCommandList(); m_commandAllocator.Reset(); m_commandList.Reset(); const D3D12_RESOURCE_STATES shaderResourceState = D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE; m_gaussianPositionBuffer.SetStride(4); if (!m_gaussianPositionBuffer.InitializeWithData( device, commandList, m_gaussianSceneData.positionData.data(), static_cast(m_gaussianSceneData.positionData.size()), shaderResourceState)) { m_lastErrorMessage = L"Failed to upload the Gaussian position buffer."; return false; } m_gaussianOtherBuffer.SetStride(4); if (!m_gaussianOtherBuffer.InitializeWithData( device, commandList, m_gaussianSceneData.otherData.data(), static_cast(m_gaussianSceneData.otherData.size()), shaderResourceState)) { m_lastErrorMessage = L"Failed to upload the Gaussian other buffer."; return false; } m_gaussianShBuffer.SetStride(4); if (!m_gaussianShBuffer.InitializeWithData( device, commandList, m_gaussianSceneData.shData.data(), static_cast(m_gaussianSceneData.shData.size()), shaderResourceState)) { m_lastErrorMessage = L"Failed to upload the Gaussian SH buffer."; return false; } D3D12_RESOURCE_DESC colorTextureDesc = {}; colorTextureDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; colorTextureDesc.Alignment = 0; colorTextureDesc.Width = m_gaussianSceneData.colorTextureWidth; colorTextureDesc.Height = m_gaussianSceneData.colorTextureHeight; colorTextureDesc.DepthOrArraySize = 1; colorTextureDesc.MipLevels = 1; colorTextureDesc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT; colorTextureDesc.SampleDesc.Count = 1; colorTextureDesc.SampleDesc.Quality = 0; colorTextureDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; colorTextureDesc.Flags = D3D12_RESOURCE_FLAG_NONE; Microsoft::WRL::ComPtr colorUploadBuffer; if (!m_gaussianColorTexture.InitializeFromData( device, commandList, colorTextureDesc, TextureType::Texture2D, m_gaussianSceneData.colorData.data(), m_gaussianSceneData.colorData.size(), m_gaussianSceneData.colorTextureWidth * GaussianSplatRuntimeData::kColorStride, &colorUploadBuffer)) { m_lastErrorMessage = L"Failed to upload the Gaussian color texture."; return false; } m_gaussianUploadBuffers.push_back(colorUploadBuffer); ResourceViewDesc rawBufferViewDesc = {}; rawBufferViewDesc.dimension = ResourceViewDimension::RawBuffer; m_gaussianPositionView.reset(static_cast(m_device.CreateShaderResourceView(&m_gaussianPositionBuffer, rawBufferViewDesc))); if (!m_gaussianPositionView) { m_lastErrorMessage = L"Failed to create the Gaussian position SRV."; return false; } m_gaussianOtherView.reset(static_cast(m_device.CreateShaderResourceView(&m_gaussianOtherBuffer, rawBufferViewDesc))); if (!m_gaussianOtherView) { m_lastErrorMessage = L"Failed to create the Gaussian other SRV."; return false; } m_gaussianShView.reset(static_cast(m_device.CreateShaderResourceView(&m_gaussianShBuffer, rawBufferViewDesc))); if (!m_gaussianShView) { m_lastErrorMessage = L"Failed to create the Gaussian SH SRV."; return false; } ResourceViewDesc textureViewDesc = {}; textureViewDesc.dimension = ResourceViewDimension::Texture2D; textureViewDesc.format = static_cast(Format::R32G32B32A32_Float); m_gaussianColorView.reset(static_cast(m_device.CreateShaderResourceView(&m_gaussianColorTexture, textureViewDesc))); if (!m_gaussianColorView) { m_lastErrorMessage = L"Failed to create the Gaussian color texture SRV."; return false; } m_commandList.Close(); void* commandLists[] = { &m_commandList }; m_commandQueue.ExecuteCommandLists(1, commandLists); m_commandQueue.WaitForIdle(); m_gaussianUploadBuffers.clear(); return true; } bool App::InitializePreparePassResources() { DescriptorSetLayoutBinding bindings[6] = {}; bindings[0].binding = 0; bindings[0].type = static_cast(DescriptorType::CBV); bindings[0].count = 1; bindings[0].visibility = static_cast(ShaderVisibility::All); bindings[1].binding = 1; bindings[1].type = static_cast(DescriptorType::SRV); bindings[1].count = 1; bindings[1].visibility = static_cast(ShaderVisibility::All); bindings[1].resourceDimension = ResourceViewDimension::RawBuffer; bindings[2].binding = 2; bindings[2].type = static_cast(DescriptorType::SRV); bindings[2].count = 1; bindings[2].visibility = static_cast(ShaderVisibility::All); bindings[2].resourceDimension = ResourceViewDimension::RawBuffer; bindings[3].binding = 3; bindings[3].type = static_cast(DescriptorType::SRV); bindings[3].count = 1; bindings[3].visibility = static_cast(ShaderVisibility::All); bindings[3].resourceDimension = ResourceViewDimension::Texture2D; bindings[4].binding = 4; bindings[4].type = static_cast(DescriptorType::SRV); bindings[4].count = 1; bindings[4].visibility = static_cast(ShaderVisibility::All); bindings[4].resourceDimension = ResourceViewDimension::RawBuffer; bindings[5].binding = 5; bindings[5].type = static_cast(DescriptorType::UAV); bindings[5].count = 1; bindings[5].visibility = static_cast(ShaderVisibility::All); bindings[5].resourceDimension = ResourceViewDimension::StructuredBuffer; DescriptorSetLayoutDesc setLayout = {}; setLayout.bindings = bindings; setLayout.bindingCount = 6; RHIPipelineLayoutDesc pipelineLayoutDesc = {}; pipelineLayoutDesc.setLayouts = &setLayout; pipelineLayoutDesc.setLayoutCount = 1; m_preparePipelineLayout = m_device.CreatePipelineLayout(pipelineLayoutDesc); if (m_preparePipelineLayout == nullptr) { m_lastErrorMessage = L"Failed to create the prepare pass pipeline layout."; return false; } BufferDesc preparedViewBufferDesc = {}; preparedViewBufferDesc.size = static_cast(sizeof(PreparedSplatView)) * m_gaussianSceneData.splatCount; preparedViewBufferDesc.stride = sizeof(PreparedSplatView); preparedViewBufferDesc.bufferType = static_cast(BufferType::Storage); preparedViewBufferDesc.flags = static_cast(BufferFlags::AllowUnorderedAccess); m_preparedViewBuffer = static_cast(m_device.CreateBuffer(preparedViewBufferDesc)); if (m_preparedViewBuffer == nullptr) { m_lastErrorMessage = L"Failed to create the prepared view buffer."; return false; } m_preparedViewBuffer->SetStride(sizeof(PreparedSplatView)); m_preparedViewBuffer->SetBufferType(BufferType::Storage); ResourceViewDesc structuredViewDesc = {}; structuredViewDesc.dimension = ResourceViewDimension::StructuredBuffer; structuredViewDesc.structureByteStride = sizeof(PreparedSplatView); structuredViewDesc.elementCount = m_gaussianSceneData.splatCount; m_preparedViewSrv.reset(static_cast(m_device.CreateShaderResourceView(m_preparedViewBuffer, structuredViewDesc))); if (!m_preparedViewSrv) { m_lastErrorMessage = L"Failed to create the prepared view SRV."; return false; } m_preparedViewUav.reset(static_cast(m_device.CreateUnorderedAccessView(m_preparedViewBuffer, structuredViewDesc))); if (!m_preparedViewUav) { m_lastErrorMessage = L"Failed to create the prepared view UAV."; return false; } DescriptorPoolDesc poolDesc = {}; poolDesc.type = DescriptorHeapType::CBV_SRV_UAV; poolDesc.descriptorCount = 5; poolDesc.shaderVisible = true; m_prepareDescriptorPool = m_device.CreateDescriptorPool(poolDesc); if (m_prepareDescriptorPool == nullptr) { m_lastErrorMessage = L"Failed to create the prepare pass descriptor pool."; return false; } m_prepareDescriptorSet = m_prepareDescriptorPool->AllocateSet(setLayout); if (m_prepareDescriptorSet == nullptr) { m_lastErrorMessage = L"Failed to allocate the prepare pass descriptor set."; return false; } m_prepareDescriptorSet->Update(1, m_gaussianPositionView.get()); m_prepareDescriptorSet->Update(2, m_gaussianOtherView.get()); m_prepareDescriptorSet->Update(3, m_gaussianColorView.get()); m_prepareDescriptorSet->Update(4, m_gaussianShView.get()); m_prepareDescriptorSet->Update(5, m_preparedViewUav.get()); ShaderCompileDesc computeShaderDesc = {}; computeShaderDesc.fileName = ResolveShaderPath(L"PrepareGaussiansCS.hlsl").wstring(); computeShaderDesc.entryPoint = L"MainCS"; computeShaderDesc.profile = L"cs_5_0"; ComputePipelineDesc pipelineDesc = {}; pipelineDesc.pipelineLayout = m_preparePipelineLayout; pipelineDesc.computeShader = computeShaderDesc; m_preparePipelineState = m_device.CreateComputePipelineState(pipelineDesc); if (m_preparePipelineState == nullptr) { m_lastErrorMessage = L"Failed to create the prepare pass pipeline state."; return false; } return true; } bool App::InitializeSortResources() { std::vector initialOrder; if (kUseCpuSortBaseline) { BuildCpuSortedOrder(m_gaussianSceneData, initialOrder, nullptr); } else { initialOrder.resize(static_cast(m_gaussianSceneData.splatCount)); for (uint32_t index = 0; index < m_gaussianSceneData.splatCount; ++index) { initialOrder[index] = index; } } const uint64_t sortBufferBytes = static_cast(m_gaussianSceneData.splatCount) * sizeof(uint32_t); const uint32_t threadBlocks = static_cast((m_gaussianSceneData.splatCount + (kDeviceRadixSortPartitionSize - 1u)) / kDeviceRadixSortPartitionSize); const uint64_t passHistogramElements = static_cast(threadBlocks) * kDeviceRadixSortRadix; const uint64_t globalHistogramElements = static_cast(kDeviceRadixSortRadix) * kDeviceRadixSortPassCount; auto initializeStorageBuffer = [this](D3D12Buffer*& buffer, uint64_t sizeInBytes) -> bool { BufferDesc bufferDesc = {}; bufferDesc.size = sizeInBytes; bufferDesc.stride = sizeof(uint32_t); bufferDesc.bufferType = static_cast(BufferType::Storage); bufferDesc.flags = static_cast(BufferFlags::AllowUnorderedAccess); buffer = static_cast(m_device.CreateBuffer(bufferDesc)); if (buffer == nullptr) { return false; } buffer->SetStride(sizeof(uint32_t)); buffer->SetBufferType(BufferType::Storage); return true; }; BufferDesc orderBufferDesc = {}; orderBufferDesc.size = sortBufferBytes; orderBufferDesc.stride = sizeof(uint32_t); orderBufferDesc.bufferType = static_cast(BufferType::Storage); orderBufferDesc.flags = static_cast(BufferFlags::AllowUnorderedAccess); m_orderBuffer = static_cast(m_device.CreateBuffer( orderBufferDesc, initialOrder.data(), static_cast(sortBufferBytes), ResourceStates::NonPixelShaderResource)); if (m_orderBuffer == nullptr) { m_lastErrorMessage = L"Failed to create the primary order buffer."; return false; } m_orderBuffer->SetStride(sizeof(uint32_t)); m_orderBuffer->SetBufferType(BufferType::Storage); if (!initializeStorageBuffer(m_orderScratchBuffer, sortBufferBytes)) { m_lastErrorMessage = L"Failed to create the scratch order buffer."; return false; } if (!initializeStorageBuffer(m_sortKeyBuffer, sortBufferBytes)) { m_lastErrorMessage = L"Failed to create the primary sort key buffer."; return false; } if (!initializeStorageBuffer(m_sortKeyScratchBuffer, sortBufferBytes)) { m_lastErrorMessage = L"Failed to create the scratch sort key buffer."; return false; } if (!initializeStorageBuffer(m_passHistogramBuffer, passHistogramElements * sizeof(uint32_t))) { m_lastErrorMessage = L"Failed to create the pass histogram buffer."; return false; } if (!initializeStorageBuffer(m_globalHistogramBuffer, globalHistogramElements * sizeof(uint32_t))) { m_lastErrorMessage = L"Failed to create the global histogram buffer."; return false; } ResourceViewDesc structuredViewDesc = {}; structuredViewDesc.dimension = ResourceViewDimension::StructuredBuffer; structuredViewDesc.structureByteStride = sizeof(uint32_t); structuredViewDesc.elementCount = m_gaussianSceneData.splatCount; ResourceViewDesc passHistogramViewDesc = structuredViewDesc; passHistogramViewDesc.elementCount = static_cast(passHistogramElements); ResourceViewDesc globalHistogramViewDesc = structuredViewDesc; globalHistogramViewDesc.elementCount = static_cast(globalHistogramElements); m_orderBufferSrv.reset(static_cast(m_device.CreateShaderResourceView(m_orderBuffer, structuredViewDesc))); if (!m_orderBufferSrv) { m_lastErrorMessage = L"Failed to create the primary order SRV."; return false; } m_orderBufferUav.reset(static_cast(m_device.CreateUnorderedAccessView(m_orderBuffer, structuredViewDesc))); if (!m_orderBufferUav) { m_lastErrorMessage = L"Failed to create the primary order UAV."; return false; } m_orderScratchUav.reset(static_cast(m_device.CreateUnorderedAccessView(m_orderScratchBuffer, structuredViewDesc))); if (!m_orderScratchUav) { m_lastErrorMessage = L"Failed to create the scratch order UAV."; return false; } m_sortKeyUav.reset(static_cast(m_device.CreateUnorderedAccessView(m_sortKeyBuffer, structuredViewDesc))); if (!m_sortKeyUav) { m_lastErrorMessage = L"Failed to create the primary sort key UAV."; return false; } m_sortKeyScratchUav.reset(static_cast(m_device.CreateUnorderedAccessView(m_sortKeyScratchBuffer, structuredViewDesc))); if (!m_sortKeyScratchUav) { m_lastErrorMessage = L"Failed to create the scratch sort key UAV."; return false; } m_passHistogramUav.reset(static_cast(m_device.CreateUnorderedAccessView(m_passHistogramBuffer, passHistogramViewDesc))); if (!m_passHistogramUav) { m_lastErrorMessage = L"Failed to create the pass histogram UAV."; return false; } m_globalHistogramUav.reset(static_cast(m_device.CreateUnorderedAccessView(m_globalHistogramBuffer, globalHistogramViewDesc))); if (!m_globalHistogramUav) { m_lastErrorMessage = L"Failed to create the global histogram UAV."; return false; } DescriptorSetLayoutBinding buildSortKeyBindings[4] = {}; buildSortKeyBindings[0].binding = 0; buildSortKeyBindings[0].type = static_cast(DescriptorType::CBV); buildSortKeyBindings[0].count = 1; buildSortKeyBindings[0].visibility = static_cast(ShaderVisibility::All); buildSortKeyBindings[1].binding = 1; buildSortKeyBindings[1].type = static_cast(DescriptorType::SRV); buildSortKeyBindings[1].count = 1; buildSortKeyBindings[1].visibility = static_cast(ShaderVisibility::All); buildSortKeyBindings[1].resourceDimension = ResourceViewDimension::RawBuffer; buildSortKeyBindings[2].binding = 2; buildSortKeyBindings[2].type = static_cast(DescriptorType::SRV); buildSortKeyBindings[2].count = 1; buildSortKeyBindings[2].visibility = static_cast(ShaderVisibility::All); buildSortKeyBindings[2].resourceDimension = ResourceViewDimension::StructuredBuffer; buildSortKeyBindings[3].binding = 3; buildSortKeyBindings[3].type = static_cast(DescriptorType::UAV); buildSortKeyBindings[3].count = 1; buildSortKeyBindings[3].visibility = static_cast(ShaderVisibility::All); buildSortKeyBindings[3].resourceDimension = ResourceViewDimension::StructuredBuffer; DescriptorSetLayoutDesc buildSortKeySetLayout = {}; buildSortKeySetLayout.bindings = buildSortKeyBindings; buildSortKeySetLayout.bindingCount = 4; RHIPipelineLayoutDesc buildSortKeyLayoutDesc = {}; buildSortKeyLayoutDesc.setLayouts = &buildSortKeySetLayout; buildSortKeyLayoutDesc.setLayoutCount = 1; m_buildSortKeyPipelineLayout = m_device.CreatePipelineLayout(buildSortKeyLayoutDesc); if (m_buildSortKeyPipelineLayout == nullptr) { m_lastErrorMessage = L"Failed to create the build-sort-key pipeline layout."; return false; } DescriptorPoolDesc buildSortKeyPoolDesc = {}; buildSortKeyPoolDesc.type = DescriptorHeapType::CBV_SRV_UAV; buildSortKeyPoolDesc.descriptorCount = 3; buildSortKeyPoolDesc.shaderVisible = true; m_buildSortKeyDescriptorPool = m_device.CreateDescriptorPool(buildSortKeyPoolDesc); if (m_buildSortKeyDescriptorPool == nullptr) { m_lastErrorMessage = L"Failed to create the build-sort-key descriptor pool."; return false; } m_buildSortKeyDescriptorSet = m_buildSortKeyDescriptorPool->AllocateSet(buildSortKeySetLayout); if (m_buildSortKeyDescriptorSet == nullptr) { m_lastErrorMessage = L"Failed to allocate the build-sort-key descriptor set."; return false; } m_buildSortKeyDescriptorSet->Update(1, m_gaussianPositionView.get()); m_buildSortKeyDescriptorSet->Update(2, m_orderBufferSrv.get()); m_buildSortKeyDescriptorSet->Update(3, m_sortKeyUav.get()); ShaderCompileDesc buildSortKeyShaderDesc = BuildDxilShaderDesc(ResolveShaderPath(L"BuildSortKeysCS.dxil"), L"cs_6_6"); if (buildSortKeyShaderDesc.compiledBinary.empty()) { m_lastErrorMessage = L"Failed to load BuildSortKeysCS.dxil."; return false; } ComputePipelineDesc buildSortKeyPipelineDesc = {}; buildSortKeyPipelineDesc.pipelineLayout = m_buildSortKeyPipelineLayout; buildSortKeyPipelineDesc.computeShader = buildSortKeyShaderDesc; m_buildSortKeyPipelineState = m_device.CreateComputePipelineState(buildSortKeyPipelineDesc); if (m_buildSortKeyPipelineState == nullptr) { m_lastErrorMessage = L"Failed to create the build-sort-key pipeline state."; return false; } DescriptorSetLayoutBinding radixSortBindings[7] = {}; radixSortBindings[0].binding = 0; radixSortBindings[0].type = static_cast(DescriptorType::CBV); radixSortBindings[0].count = 1; radixSortBindings[0].visibility = static_cast(ShaderVisibility::All); for (uint32_t bindingIndex = 1; bindingIndex <= 6; ++bindingIndex) { radixSortBindings[bindingIndex].binding = bindingIndex; radixSortBindings[bindingIndex].type = static_cast(DescriptorType::UAV); radixSortBindings[bindingIndex].count = 1; radixSortBindings[bindingIndex].visibility = static_cast(ShaderVisibility::All); radixSortBindings[bindingIndex].resourceDimension = ResourceViewDimension::StructuredBuffer; } DescriptorSetLayoutDesc radixSortSetLayout = {}; radixSortSetLayout.bindings = radixSortBindings; radixSortSetLayout.bindingCount = 7; RHIPipelineLayoutDesc radixSortLayoutDesc = {}; radixSortLayoutDesc.setLayouts = &radixSortSetLayout; radixSortLayoutDesc.setLayoutCount = 1; m_radixSortPipelineLayout = m_device.CreatePipelineLayout(radixSortLayoutDesc); if (m_radixSortPipelineLayout == nullptr) { m_lastErrorMessage = L"Failed to create the radix-sort pipeline layout."; return false; } DescriptorPoolDesc radixSortPoolDesc = {}; radixSortPoolDesc.type = DescriptorHeapType::CBV_SRV_UAV; radixSortPoolDesc.descriptorCount = 12; radixSortPoolDesc.shaderVisible = true; m_radixSortDescriptorPool = m_device.CreateDescriptorPool(radixSortPoolDesc); if (m_radixSortDescriptorPool == nullptr) { m_lastErrorMessage = L"Failed to create the radix-sort descriptor pool."; return false; } m_radixSortDescriptorSetPrimaryToScratch = m_radixSortDescriptorPool->AllocateSet(radixSortSetLayout); m_radixSortDescriptorSetScratchToPrimary = m_radixSortDescriptorPool->AllocateSet(radixSortSetLayout); if (m_radixSortDescriptorSetPrimaryToScratch == nullptr || m_radixSortDescriptorSetScratchToPrimary == nullptr) { m_lastErrorMessage = L"Failed to allocate the radix-sort descriptor sets."; return false; } m_radixSortDescriptorSetPrimaryToScratch->Update(1, m_sortKeyUav.get()); m_radixSortDescriptorSetPrimaryToScratch->Update(2, m_sortKeyScratchUav.get()); m_radixSortDescriptorSetPrimaryToScratch->Update(3, m_orderBufferUav.get()); m_radixSortDescriptorSetPrimaryToScratch->Update(4, m_orderScratchUav.get()); m_radixSortDescriptorSetPrimaryToScratch->Update(5, m_passHistogramUav.get()); m_radixSortDescriptorSetPrimaryToScratch->Update(6, m_globalHistogramUav.get()); m_radixSortDescriptorSetScratchToPrimary->Update(1, m_sortKeyScratchUav.get()); m_radixSortDescriptorSetScratchToPrimary->Update(2, m_sortKeyUav.get()); m_radixSortDescriptorSetScratchToPrimary->Update(3, m_orderScratchUav.get()); m_radixSortDescriptorSetScratchToPrimary->Update(4, m_orderBufferUav.get()); m_radixSortDescriptorSetScratchToPrimary->Update(5, m_passHistogramUav.get()); m_radixSortDescriptorSetScratchToPrimary->Update(6, m_globalHistogramUav.get()); const ShaderCompileDesc radixInitShaderDesc = BuildDxilShaderDesc(ResolveShaderPath(L"RadixInit.dxil"), L"cs_6_6"); const ShaderCompileDesc radixUpsweepShaderDesc = BuildDxilShaderDesc(ResolveShaderPath(L"RadixUpsweep.dxil"), L"cs_6_6"); const ShaderCompileDesc radixGlobalHistogramShaderDesc = BuildDxilShaderDesc(ResolveShaderPath(L"RadixGlobalHistogram.dxil"), L"cs_6_6"); const ShaderCompileDesc radixScanShaderDesc = BuildDxilShaderDesc(ResolveShaderPath(L"RadixScan.dxil"), L"cs_6_6"); const ShaderCompileDesc radixDownsweepShaderDesc = BuildDxilShaderDesc(ResolveShaderPath(L"RadixDownsweep.dxil"), L"cs_6_6"); if (radixInitShaderDesc.compiledBinary.empty() || radixUpsweepShaderDesc.compiledBinary.empty() || radixGlobalHistogramShaderDesc.compiledBinary.empty() || radixScanShaderDesc.compiledBinary.empty() || radixDownsweepShaderDesc.compiledBinary.empty()) { m_lastErrorMessage = L"Failed to load one or more radix-sort DXIL shaders."; return false; } ComputePipelineDesc radixPipelineDesc = {}; radixPipelineDesc.pipelineLayout = m_radixSortPipelineLayout; radixPipelineDesc.computeShader = radixInitShaderDesc; m_radixSortInitPipelineState = m_device.CreateComputePipelineState(radixPipelineDesc); if (m_radixSortInitPipelineState == nullptr) { m_lastErrorMessage = L"Failed to create the radix-sort init pipeline state."; return false; } radixPipelineDesc.computeShader = radixUpsweepShaderDesc; m_radixSortUpsweepPipelineState = m_device.CreateComputePipelineState(radixPipelineDesc); if (m_radixSortUpsweepPipelineState == nullptr) { m_lastErrorMessage = L"Failed to create the radix-sort upsweep pipeline state."; return false; } radixPipelineDesc.computeShader = radixGlobalHistogramShaderDesc; m_radixSortGlobalHistogramPipelineState = m_device.CreateComputePipelineState(radixPipelineDesc); if (m_radixSortGlobalHistogramPipelineState == nullptr) { m_lastErrorMessage = L"Failed to create the radix-sort global-histogram pipeline state."; return false; } radixPipelineDesc.computeShader = radixScanShaderDesc; m_radixSortScanPipelineState = m_device.CreateComputePipelineState(radixPipelineDesc); if (m_radixSortScanPipelineState == nullptr) { m_lastErrorMessage = L"Failed to create the radix-sort scan pipeline state."; return false; } radixPipelineDesc.computeShader = radixDownsweepShaderDesc; m_radixSortDownsweepPipelineState = m_device.CreateComputePipelineState(radixPipelineDesc); if (m_radixSortDownsweepPipelineState == nullptr) { m_lastErrorMessage = L"Failed to create the radix-sort downsweep pipeline state."; return false; } return true; } bool App::InitializeDebugDrawResources() { DescriptorSetLayoutBinding bindings[3] = {}; bindings[0].binding = 0; bindings[0].type = static_cast(DescriptorType::CBV); bindings[0].count = 1; bindings[0].visibility = static_cast(ShaderVisibility::All); bindings[1].binding = 1; bindings[1].type = static_cast(DescriptorType::SRV); bindings[1].count = 1; bindings[1].visibility = static_cast(ShaderVisibility::All); bindings[1].resourceDimension = ResourceViewDimension::StructuredBuffer; bindings[2].binding = 2; bindings[2].type = static_cast(DescriptorType::SRV); bindings[2].count = 1; bindings[2].visibility = static_cast(ShaderVisibility::All); bindings[2].resourceDimension = ResourceViewDimension::StructuredBuffer; DescriptorSetLayoutDesc setLayout = {}; setLayout.bindings = bindings; setLayout.bindingCount = 3; RHIPipelineLayoutDesc pipelineLayoutDesc = {}; pipelineLayoutDesc.setLayouts = &setLayout; pipelineLayoutDesc.setLayoutCount = 1; m_debugPipelineLayout = m_device.CreatePipelineLayout(pipelineLayoutDesc); if (m_debugPipelineLayout == nullptr) { m_lastErrorMessage = L"Failed to create the debug draw pipeline layout."; return false; } DescriptorPoolDesc poolDesc = {}; poolDesc.type = DescriptorHeapType::CBV_SRV_UAV; poolDesc.descriptorCount = 2; poolDesc.shaderVisible = true; m_debugDescriptorPool = m_device.CreateDescriptorPool(poolDesc); if (m_debugDescriptorPool == nullptr) { m_lastErrorMessage = L"Failed to create the debug draw descriptor pool."; return false; } m_debugDescriptorSet = m_debugDescriptorPool->AllocateSet(setLayout); if (m_debugDescriptorSet == nullptr) { m_lastErrorMessage = L"Failed to allocate the debug draw descriptor set."; return false; } m_debugDescriptorSet->Update(1, m_preparedViewSrv.get()); m_debugDescriptorSet->Update(2, m_orderBufferSrv.get()); GraphicsPipelineDesc pipelineDesc = {}; pipelineDesc.pipelineLayout = m_debugPipelineLayout; pipelineDesc.topologyType = static_cast(PrimitiveTopologyType::Triangle); pipelineDesc.renderTargetCount = 1; pipelineDesc.renderTargetFormats[0] = static_cast(Format::R8G8B8A8_UNorm); pipelineDesc.depthStencilFormat = static_cast(Format::D24_UNorm_S8_UInt); pipelineDesc.sampleCount = 1; pipelineDesc.sampleQuality = 0; pipelineDesc.rasterizerState.cullMode = static_cast(CullMode::None); pipelineDesc.depthStencilState.depthTestEnable = false; pipelineDesc.depthStencilState.depthWriteEnable = false; pipelineDesc.blendState.blendEnable = true; pipelineDesc.blendState.srcBlend = static_cast(BlendFactor::One); pipelineDesc.blendState.dstBlend = static_cast(BlendFactor::InvSrcAlpha); pipelineDesc.blendState.srcBlendAlpha = static_cast(BlendFactor::One); pipelineDesc.blendState.dstBlendAlpha = static_cast(BlendFactor::InvSrcAlpha); pipelineDesc.vertexShader.fileName = ResolveShaderPath(L"DebugPointsVS.hlsl").wstring(); pipelineDesc.vertexShader.entryPoint = L"MainVS"; pipelineDesc.vertexShader.profile = L"vs_5_0"; pipelineDesc.fragmentShader.fileName = ResolveShaderPath(L"DebugPointsPS.hlsl").wstring(); pipelineDesc.fragmentShader.entryPoint = L"MainPS"; pipelineDesc.fragmentShader.profile = L"ps_5_0"; m_debugPipelineState = m_device.CreatePipelineState(pipelineDesc); if (m_debugPipelineState == nullptr) { m_lastErrorMessage = L"Failed to create the debug draw pipeline state."; return false; } return true; } void App::ShutdownGaussianGpuResources() { m_gaussianColorView.reset(); m_gaussianShView.reset(); m_gaussianOtherView.reset(); m_gaussianPositionView.reset(); m_gaussianUploadBuffers.clear(); m_gaussianColorTexture.Shutdown(); m_gaussianShBuffer.Shutdown(); m_gaussianOtherBuffer.Shutdown(); m_gaussianPositionBuffer.Shutdown(); } void App::ShutdownPreparePassResources() { if (m_prepareDescriptorSet != nullptr) { m_prepareDescriptorSet->Shutdown(); delete m_prepareDescriptorSet; m_prepareDescriptorSet = nullptr; } ShutdownAndDelete(m_prepareDescriptorPool); ShutdownAndDelete(m_preparePipelineState); ShutdownAndDelete(m_preparePipelineLayout); m_preparedViewUav.reset(); m_preparedViewSrv.reset(); if (m_preparedViewBuffer != nullptr) { m_preparedViewBuffer->Shutdown(); delete m_preparedViewBuffer; m_preparedViewBuffer = nullptr; } } void App::ShutdownSortResources() { if (m_buildSortKeyDescriptorSet != nullptr) { m_buildSortKeyDescriptorSet->Shutdown(); delete m_buildSortKeyDescriptorSet; m_buildSortKeyDescriptorSet = nullptr; } ShutdownAndDelete(m_buildSortKeyDescriptorPool); ShutdownAndDelete(m_buildSortKeyPipelineState); ShutdownAndDelete(m_buildSortKeyPipelineLayout); if (m_radixSortDescriptorSetPrimaryToScratch != nullptr) { m_radixSortDescriptorSetPrimaryToScratch->Shutdown(); delete m_radixSortDescriptorSetPrimaryToScratch; m_radixSortDescriptorSetPrimaryToScratch = nullptr; } if (m_radixSortDescriptorSetScratchToPrimary != nullptr) { m_radixSortDescriptorSetScratchToPrimary->Shutdown(); delete m_radixSortDescriptorSetScratchToPrimary; m_radixSortDescriptorSetScratchToPrimary = nullptr; } ShutdownAndDelete(m_radixSortDescriptorPool); ShutdownAndDelete(m_radixSortDownsweepPipelineState); ShutdownAndDelete(m_radixSortScanPipelineState); ShutdownAndDelete(m_radixSortGlobalHistogramPipelineState); ShutdownAndDelete(m_radixSortUpsweepPipelineState); ShutdownAndDelete(m_radixSortInitPipelineState); ShutdownAndDelete(m_radixSortPipelineLayout); m_globalHistogramUav.reset(); m_passHistogramUav.reset(); m_orderScratchUav.reset(); m_orderBufferUav.reset(); m_orderBufferSrv.reset(); m_sortKeyScratchUav.reset(); m_sortKeyUav.reset(); if (m_orderBuffer != nullptr) { m_orderBuffer->Shutdown(); delete m_orderBuffer; m_orderBuffer = nullptr; } if (m_orderScratchBuffer != nullptr) { m_orderScratchBuffer->Shutdown(); delete m_orderScratchBuffer; m_orderScratchBuffer = nullptr; } if (m_sortKeyBuffer != nullptr) { m_sortKeyBuffer->Shutdown(); delete m_sortKeyBuffer; m_sortKeyBuffer = nullptr; } if (m_sortKeyScratchBuffer != nullptr) { m_sortKeyScratchBuffer->Shutdown(); delete m_sortKeyScratchBuffer; m_sortKeyScratchBuffer = nullptr; } if (m_passHistogramBuffer != nullptr) { m_passHistogramBuffer->Shutdown(); delete m_passHistogramBuffer; m_passHistogramBuffer = nullptr; } if (m_globalHistogramBuffer != nullptr) { m_globalHistogramBuffer->Shutdown(); delete m_globalHistogramBuffer; m_globalHistogramBuffer = nullptr; } } void App::ShutdownDebugDrawResources() { if (m_debugDescriptorSet != nullptr) { m_debugDescriptorSet->Shutdown(); delete m_debugDescriptorSet; m_debugDescriptorSet = nullptr; } ShutdownAndDelete(m_debugDescriptorPool); ShutdownAndDelete(m_debugPipelineState); ShutdownAndDelete(m_debugPipelineLayout); } void App::Shutdown() { AppendTrace("Shutdown: begin"); if (!m_isInitialized && m_hwnd == nullptr) { AppendTrace("Shutdown: skipped"); return; } m_running = false; if (m_commandQueue.GetCommandQueue() != nullptr) { AppendTrace("Shutdown: WaitForIdle"); m_commandQueue.WaitForIdle(); } ShutdownDebugDrawResources(); ShutdownSortResources(); ShutdownPreparePassResources(); ShutdownGaussianGpuResources(); m_commandList.Shutdown(); m_commandAllocator.Shutdown(); m_dsv.Shutdown(); for (D3D12ResourceView& rtv : m_rtvs) { rtv.Shutdown(); } m_dsvHeap.Shutdown(); m_rtvHeap.Shutdown(); m_depthStencil.Shutdown(); m_swapChain.Shutdown(); m_commandQueue.Shutdown(); m_device.Shutdown(); if (m_hwnd != nullptr) { AppendTrace("Shutdown: DestroyWindow"); DestroyWindow(m_hwnd); m_hwnd = nullptr; } if (m_instance != nullptr) { UnregisterClassW(kWindowClassName, m_instance); m_instance = nullptr; } m_isInitialized = false; AppendTrace("Shutdown: end"); } bool App::CaptureSortSnapshot() { if (m_sortKeyBuffer == nullptr || m_orderBuffer == nullptr || m_gaussianSceneData.splatCount == 0 || m_sortKeySnapshotPath.empty()) { return true; } const uint32_t sampleCount = std::min(16u, m_gaussianSceneData.splatCount); const uint64_t keyBufferBytes = static_cast(m_gaussianSceneData.splatCount) * sizeof(uint32_t); const uint64_t sampleBytes = static_cast(sampleCount) * sizeof(uint32_t); D3D12Buffer sortKeyReadbackBuffer; if (!sortKeyReadbackBuffer.Initialize( m_device.GetDevice(), keyBufferBytes, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_HEAP_TYPE_READBACK, D3D12_RESOURCE_FLAG_NONE)) { m_lastErrorMessage = L"Failed to create the sort key readback buffer."; return false; } D3D12Buffer orderReadbackBuffer; if (!orderReadbackBuffer.Initialize( m_device.GetDevice(), sampleBytes, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_HEAP_TYPE_READBACK, D3D12_RESOURCE_FLAG_NONE)) { m_lastErrorMessage = L"Failed to create the order readback buffer."; sortKeyReadbackBuffer.Shutdown(); return false; } m_commandAllocator.Reset(); m_commandList.Reset(); if (m_sortKeyBuffer->GetState() != ResourceStates::CopySrc) { m_commandList.TransitionBarrier( m_sortKeyBuffer->GetResource(), m_sortKeyBuffer->GetState(), ResourceStates::CopySrc); m_sortKeyBuffer->SetState(ResourceStates::CopySrc); } if (m_orderBuffer->GetState() != ResourceStates::CopySrc) { m_commandList.TransitionBarrier( m_orderBuffer->GetResource(), m_orderBuffer->GetState(), ResourceStates::CopySrc); m_orderBuffer->SetState(ResourceStates::CopySrc); } m_commandList.GetCommandList()->CopyBufferRegion( sortKeyReadbackBuffer.GetResource(), 0, m_sortKeyBuffer->GetResource(), 0, keyBufferBytes); m_commandList.GetCommandList()->CopyBufferRegion( orderReadbackBuffer.GetResource(), 0, m_orderBuffer->GetResource(), 0, sampleBytes); m_commandList.Close(); void* commandLists[] = { &m_commandList }; m_commandQueue.ExecuteCommandLists(1, commandLists); m_commandQueue.WaitForIdle(); const uint32_t* keys = static_cast(sortKeyReadbackBuffer.Map()); if (keys == nullptr) { m_lastErrorMessage = L"Failed to map the sort key readback buffer."; sortKeyReadbackBuffer.Shutdown(); orderReadbackBuffer.Shutdown(); return false; } const uint32_t* order = static_cast(orderReadbackBuffer.Map()); if (order == nullptr) { sortKeyReadbackBuffer.Unmap(); sortKeyReadbackBuffer.Shutdown(); orderReadbackBuffer.Shutdown(); m_lastErrorMessage = L"Failed to map the order readback buffer."; return false; } const std::filesystem::path snapshotPath = ResolveNearExecutable(m_sortKeySnapshotPath); if (!snapshotPath.parent_path().empty()) { std::filesystem::create_directories(snapshotPath.parent_path()); } std::ofstream output(snapshotPath, std::ios::binary | std::ios::trunc); if (!output.is_open()) { orderReadbackBuffer.Unmap(); sortKeyReadbackBuffer.Unmap(); orderReadbackBuffer.Shutdown(); sortKeyReadbackBuffer.Shutdown(); m_lastErrorMessage = L"Failed to open the sort key snapshot output file."; return false; } output << "sample_count=" << sampleCount << '\n'; bool isSorted = true; uint32_t firstInversionIndex = 0u; uint32_t firstInversionPrevious = 0u; uint32_t firstInversionCurrent = 0u; for (uint32_t index = 1; index < m_gaussianSceneData.splatCount; ++index) { if (keys[index - 1u] > keys[index]) { isSorted = false; firstInversionIndex = index; firstInversionPrevious = keys[index - 1u]; firstInversionCurrent = keys[index]; break; } } std::vector cpuReferenceOrder; std::vector cpuReferenceKeys; BuildCpuSortedOrder(m_gaussianSceneData, cpuReferenceOrder, &cpuReferenceKeys); uint32_t firstCpuMismatchIndex = 0u; uint32_t firstCpuMismatchGpu = 0u; uint32_t firstCpuMismatchCpu = 0u; bool matchesCpuReference = true; for (uint32_t index = 0; index < m_gaussianSceneData.splatCount; ++index) { if (keys[index] != cpuReferenceKeys[index]) { matchesCpuReference = false; firstCpuMismatchIndex = index; firstCpuMismatchGpu = keys[index]; firstCpuMismatchCpu = cpuReferenceKeys[index]; break; } } output << "sorted=" << (isSorted ? 1 : 0) << '\n'; output << "first_inversion_index=" << firstInversionIndex << '\n'; output << "first_inversion_prev=" << firstInversionPrevious << '\n'; output << "first_inversion_curr=" << firstInversionCurrent << '\n'; output << "matches_cpu_reference=" << (matchesCpuReference ? 1 : 0) << '\n'; output << "first_cpu_mismatch_index=" << firstCpuMismatchIndex << '\n'; output << "first_cpu_mismatch_gpu=" << firstCpuMismatchGpu << '\n'; output << "first_cpu_mismatch_cpu=" << firstCpuMismatchCpu << '\n'; for (uint32_t index = 0; index < sampleCount; ++index) { output << "key[" << index << "]=" << keys[index] << '\n'; output << "order[" << index << "]=" << order[index] << '\n'; output << "cpu_order[" << index << "]=" << cpuReferenceOrder[index] << '\n'; output << "cpu_key[" << index << "]=" << cpuReferenceKeys[index] << '\n'; } orderReadbackBuffer.Unmap(); sortKeyReadbackBuffer.Unmap(); orderReadbackBuffer.Shutdown(); sortKeyReadbackBuffer.Shutdown(); return output.good(); } bool App::CapturePass3HistogramDebug() { if (m_sortKeyScratchBuffer == nullptr || m_sortKeyBuffer == nullptr || m_globalHistogramBuffer == nullptr || m_gaussianSceneData.splatCount == 0) { return true; } const uint64_t keyBufferBytes = static_cast(m_gaussianSceneData.splatCount) * sizeof(uint32_t); const uint32_t histogramElementCount = kDeviceRadixSortRadix * kDeviceRadixSortPassCount; const uint64_t histogramBytes = static_cast(histogramElementCount) * sizeof(uint32_t); D3D12Buffer keyReadbackBuffer; if (!keyReadbackBuffer.Initialize( m_device.GetDevice(), keyBufferBytes, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_HEAP_TYPE_READBACK, D3D12_RESOURCE_FLAG_NONE)) { m_lastErrorMessage = L"Failed to create the pass3 histogram key readback buffer."; return false; } D3D12Buffer histogramReadbackBuffer; if (!histogramReadbackBuffer.Initialize( m_device.GetDevice(), histogramBytes, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_HEAP_TYPE_READBACK, D3D12_RESOURCE_FLAG_NONE)) { keyReadbackBuffer.Shutdown(); m_lastErrorMessage = L"Failed to create the pass3 histogram readback buffer."; return false; } D3D12Buffer primaryKeyReadbackBuffer; if (!primaryKeyReadbackBuffer.Initialize( m_device.GetDevice(), keyBufferBytes, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_HEAP_TYPE_READBACK, D3D12_RESOURCE_FLAG_NONE)) { histogramReadbackBuffer.Shutdown(); keyReadbackBuffer.Shutdown(); m_lastErrorMessage = L"Failed to create the pass3 primary-key readback buffer."; return false; } m_commandAllocator.Reset(); m_commandList.Reset(); if (m_sortKeyScratchBuffer->GetState() != ResourceStates::CopySrc) { m_commandList.TransitionBarrier( m_sortKeyScratchBuffer->GetResource(), m_sortKeyScratchBuffer->GetState(), ResourceStates::CopySrc); m_sortKeyScratchBuffer->SetState(ResourceStates::CopySrc); } if (m_sortKeyBuffer->GetState() != ResourceStates::CopySrc) { m_commandList.TransitionBarrier( m_sortKeyBuffer->GetResource(), m_sortKeyBuffer->GetState(), ResourceStates::CopySrc); m_sortKeyBuffer->SetState(ResourceStates::CopySrc); } if (m_globalHistogramBuffer->GetState() != ResourceStates::CopySrc) { m_commandList.TransitionBarrier( m_globalHistogramBuffer->GetResource(), m_globalHistogramBuffer->GetState(), ResourceStates::CopySrc); m_globalHistogramBuffer->SetState(ResourceStates::CopySrc); } m_commandList.GetCommandList()->CopyBufferRegion( keyReadbackBuffer.GetResource(), 0, m_sortKeyScratchBuffer->GetResource(), 0, keyBufferBytes); m_commandList.GetCommandList()->CopyBufferRegion( primaryKeyReadbackBuffer.GetResource(), 0, m_sortKeyBuffer->GetResource(), 0, keyBufferBytes); m_commandList.GetCommandList()->CopyBufferRegion( histogramReadbackBuffer.GetResource(), 0, m_globalHistogramBuffer->GetResource(), 0, histogramBytes); m_commandList.Close(); void* commandLists[] = { &m_commandList }; m_commandQueue.ExecuteCommandLists(1, commandLists); m_commandQueue.WaitForIdle(); const uint32_t* keys = static_cast(keyReadbackBuffer.Map()); if (keys == nullptr) { primaryKeyReadbackBuffer.Shutdown(); histogramReadbackBuffer.Shutdown(); keyReadbackBuffer.Shutdown(); m_lastErrorMessage = L"Failed to map the pass3 key readback buffer."; return false; } const uint32_t* primaryKeys = static_cast(primaryKeyReadbackBuffer.Map()); if (primaryKeys == nullptr) { keyReadbackBuffer.Unmap(); primaryKeyReadbackBuffer.Shutdown(); histogramReadbackBuffer.Shutdown(); keyReadbackBuffer.Shutdown(); m_lastErrorMessage = L"Failed to map the pass3 primary-key readback buffer."; return false; } const uint32_t* histogram = static_cast(histogramReadbackBuffer.Map()); if (histogram == nullptr) { primaryKeyReadbackBuffer.Unmap(); keyReadbackBuffer.Unmap(); primaryKeyReadbackBuffer.Shutdown(); histogramReadbackBuffer.Shutdown(); keyReadbackBuffer.Shutdown(); m_lastErrorMessage = L"Failed to map the pass3 histogram readback buffer."; return false; } std::vector cpuCounts(kDeviceRadixSortRadix, 0u); std::vector primaryCpuCounts(kDeviceRadixSortRadix, 0u); for (uint32_t index = 0; index < m_gaussianSceneData.splatCount; ++index) { const uint32_t bin = (keys[index] >> 24u) & 0xffu; ++cpuCounts[bin]; const uint32_t primaryBin = (primaryKeys[index] >> 24u) & 0xffu; ++primaryCpuCounts[primaryBin]; } std::vector cpuExclusiveOffsets(kDeviceRadixSortRadix, 0u); std::vector primaryCpuExclusiveOffsets(kDeviceRadixSortRadix, 0u); uint32_t runningOffset = 0u; uint32_t primaryRunningOffset = 0u; for (uint32_t bin = 0; bin < kDeviceRadixSortRadix; ++bin) { cpuExclusiveOffsets[bin] = runningOffset; runningOffset += cpuCounts[bin]; primaryCpuExclusiveOffsets[bin] = primaryRunningOffset; primaryRunningOffset += primaryCpuCounts[bin]; } const std::filesystem::path debugPath = ResolveNearExecutable(L"phase3_hist_debug.txt"); if (!debugPath.parent_path().empty()) { std::filesystem::create_directories(debugPath.parent_path()); } std::ofstream output(debugPath, std::ios::binary | std::ios::trunc); if (!output.is_open()) { histogramReadbackBuffer.Unmap(); keyReadbackBuffer.Unmap(); histogramReadbackBuffer.Shutdown(); keyReadbackBuffer.Shutdown(); m_lastErrorMessage = L"Failed to open the pass3 histogram debug output file."; return false; } uint32_t mismatchCount = 0u; uint32_t primaryMismatchCount = 0u; constexpr uint32_t kPass3GlobalHistogramBase = kDeviceRadixSortRadix * 3u; output << "splat_count=" << m_gaussianSceneData.splatCount << '\n'; output << "final_offset=" << runningOffset << '\n'; output << "primary_final_offset=" << primaryRunningOffset << '\n'; for (uint32_t bin = 0; bin < kDeviceRadixSortRadix; ++bin) { const uint32_t gpuOffset = histogram[kPass3GlobalHistogramBase + bin]; const uint32_t cpuOffset = cpuExclusiveOffsets[bin]; const uint32_t primaryCpuOffset = primaryCpuExclusiveOffsets[bin]; if (gpuOffset != cpuOffset) { ++mismatchCount; } if (gpuOffset != primaryCpuOffset) { ++primaryMismatchCount; } output << "bin[" << bin << "].count=" << cpuCounts[bin] << " cpu_offset=" << cpuOffset << " primary_count=" << primaryCpuCounts[bin] << " primary_cpu_offset=" << primaryCpuOffset << " gpu_offset=" << gpuOffset << '\n'; } output << "mismatch_count=" << mismatchCount << '\n'; output << "primary_mismatch_count=" << primaryMismatchCount << '\n'; histogramReadbackBuffer.Unmap(); primaryKeyReadbackBuffer.Unmap(); keyReadbackBuffer.Unmap(); primaryKeyReadbackBuffer.Shutdown(); histogramReadbackBuffer.Shutdown(); keyReadbackBuffer.Shutdown(); return output.good(); } void App::RenderFrame(bool captureScreenshot) { AppendTrace(captureScreenshot ? "RenderFrame: begin capture" : "RenderFrame: begin"); if (m_hasRenderedAtLeastOneFrame) { AppendTrace("RenderFrame: WaitForPreviousFrame"); m_commandQueue.WaitForPreviousFrame(); } m_commandAllocator.Reset(); m_commandList.Reset(); const int currentBackBufferIndex = m_swapChain.GetCurrentBackBufferIndex(); D3D12Texture& backBuffer = m_swapChain.GetBackBuffer(currentBackBufferIndex); m_commandList.TransitionBarrier(backBuffer.GetResource(), ResourceStates::Present, ResourceStates::RenderTarget); const CPUDescriptorHandle rtvCpuHandle = m_rtvHeap.GetCPUDescriptorHandle(currentBackBufferIndex); const CPUDescriptorHandle dsvCpuHandle = m_dsvHeap.GetCPUDescriptorHandle(0); const D3D12_CPU_DESCRIPTOR_HANDLE rtvHandle = { rtvCpuHandle.ptr }; const D3D12_CPU_DESCRIPTOR_HANDLE dsvHandle = { dsvCpuHandle.ptr }; m_commandList.SetRenderTargetsHandle(1, &rtvHandle, &dsvHandle); const Viewport viewport = { 0.0f, 0.0f, static_cast(m_width), static_cast(m_height), 0.0f, 1.0f }; const Rect scissorRect = { 0, 0, m_width, m_height }; m_commandList.SetViewport(viewport); m_commandList.SetScissorRect(scissorRect); m_commandList.ClearRenderTargetView(rtvHandle, kClearColor, 0, nullptr); m_commandList.ClearDepthStencilView( dsvHandle, D3D12_CLEAR_FLAG_DEPTH | D3D12_CLEAR_FLAG_STENCIL, 1.0f, 0, 0, nullptr); const FrameConstants frameConstants = BuildFrameConstants( static_cast(m_width), static_cast(m_height), m_gaussianSceneData.splatCount); const uint32_t threadBlocks = static_cast((m_gaussianSceneData.splatCount + (kDeviceRadixSortPartitionSize - 1u)) / kDeviceRadixSortPartitionSize); const uint32_t passHistogramElementCount = threadBlocks * kDeviceRadixSortRadix; constexpr uint32_t kSortDebugStageCount = 5u; const char* const sortDebugStageNames[kSortDebugStageCount] = { "build_sort_keys", "radix_pass_0", "radix_pass_1", "radix_pass_2", "radix_pass_3", }; const uint32_t sortDebugSampleCount = 0u; const uint64_t sortDebugSampleBytes = static_cast(sortDebugSampleCount) * sizeof(uint32_t); D3D12Buffer sortDebugKeyReadbackBuffer; D3D12Buffer sortDebugOrderReadbackBuffer; const bool sortDebugEnabled = sortDebugSampleCount > 0 && sortDebugKeyReadbackBuffer.Initialize( m_device.GetDevice(), sortDebugSampleBytes * kSortDebugStageCount, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_HEAP_TYPE_READBACK, D3D12_RESOURCE_FLAG_NONE) && sortDebugOrderReadbackBuffer.Initialize( m_device.GetDevice(), sortDebugSampleBytes * kSortDebugStageCount, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_HEAP_TYPE_READBACK, D3D12_RESOURCE_FLAG_NONE); D3D12Buffer pass3PassHistogramReadbackBuffer; D3D12Buffer pass3PreScanGlobalHistogramReadbackBuffer; const uint64_t pass3PassHistogramReadbackBytes = static_cast(passHistogramElementCount) * sizeof(uint32_t); const uint64_t pass3PreScanGlobalHistogramReadbackBytes = static_cast(kDeviceRadixSortRadix * kDeviceRadixSortPassCount) * sizeof(uint32_t); const bool pass3PassHistogramDebugEnabled = captureScreenshot && pass3PassHistogramReadbackBuffer.Initialize( m_device.GetDevice(), pass3PassHistogramReadbackBytes, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_HEAP_TYPE_READBACK, D3D12_RESOURCE_FLAG_NONE); const bool pass3PreScanGlobalHistogramDebugEnabled = captureScreenshot && pass3PreScanGlobalHistogramReadbackBuffer.Initialize( m_device.GetDevice(), pass3PreScanGlobalHistogramReadbackBytes, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_HEAP_TYPE_READBACK, D3D12_RESOURCE_FLAG_NONE); bool pass3PassHistogramCaptured = false; bool pass3PreScanGlobalHistogramCaptured = false; auto transitionBuffer = [this](D3D12Buffer* buffer, ResourceStates newState) { if (buffer != nullptr && buffer->GetState() != newState) { m_commandList.TransitionBarrier( buffer->GetResource(), buffer->GetState(), newState); buffer->SetState(newState); } }; auto recordSortDebugStage = [&](uint32_t stageIndex, D3D12Buffer* keyBuffer, D3D12Buffer* orderBuffer, ResourceStates keyRestoreState, ResourceStates orderRestoreState) { if (!sortDebugEnabled || keyBuffer == nullptr || orderBuffer == nullptr || sortDebugSampleBytes == 0 || stageIndex >= kSortDebugStageCount) { return; } transitionBuffer(keyBuffer, ResourceStates::CopySrc); transitionBuffer(orderBuffer, ResourceStates::CopySrc); m_commandList.GetCommandList()->CopyBufferRegion( sortDebugKeyReadbackBuffer.GetResource(), static_cast(stageIndex) * sortDebugSampleBytes, keyBuffer->GetResource(), 0, sortDebugSampleBytes); m_commandList.GetCommandList()->CopyBufferRegion( sortDebugOrderReadbackBuffer.GetResource(), static_cast(stageIndex) * sortDebugSampleBytes, orderBuffer->GetResource(), 0, sortDebugSampleBytes); transitionBuffer(keyBuffer, keyRestoreState); transitionBuffer(orderBuffer, orderRestoreState); }; m_prepareDescriptorSet->WriteConstant(0, &frameConstants, sizeof(frameConstants)); m_debugDescriptorSet->WriteConstant(0, &frameConstants, sizeof(frameConstants)); transitionBuffer(m_preparedViewBuffer, ResourceStates::UnorderedAccess); m_commandList.SetPipelineState(m_preparePipelineState); RHIDescriptorSet* prepareSets[] = { m_prepareDescriptorSet }; m_commandList.SetComputeDescriptorSets(0, 1, prepareSets, m_preparePipelineLayout); m_commandList.Dispatch((m_gaussianSceneData.splatCount + (kPrepareThreadGroupSize - 1)) / kPrepareThreadGroupSize, 1, 1); m_commandList.UAVBarrier(m_preparedViewBuffer->GetResource()); transitionBuffer(m_orderBuffer, ResourceStates::NonPixelShaderResource); transitionBuffer(m_sortKeyBuffer, ResourceStates::UnorderedAccess); m_buildSortKeyDescriptorSet->WriteConstant(0, &frameConstants, sizeof(frameConstants)); m_commandList.SetPipelineState(m_buildSortKeyPipelineState); RHIDescriptorSet* buildSortKeySets[] = { m_buildSortKeyDescriptorSet }; m_commandList.SetComputeDescriptorSets(0, 1, buildSortKeySets, m_buildSortKeyPipelineLayout); m_commandList.Dispatch((m_gaussianSceneData.splatCount + (kSortThreadGroupSize - 1)) / kSortThreadGroupSize, 1, 1); m_commandList.UAVBarrier(m_sortKeyBuffer->GetResource()); recordSortDebugStage( 0u, m_sortKeyBuffer, m_orderBuffer, ResourceStates::UnorderedAccess, ResourceStates::NonPixelShaderResource); if (!kUseCpuSortBaseline) { transitionBuffer(m_orderBuffer, ResourceStates::UnorderedAccess); transitionBuffer(m_orderScratchBuffer, ResourceStates::UnorderedAccess); transitionBuffer(m_sortKeyScratchBuffer, ResourceStates::UnorderedAccess); transitionBuffer(m_passHistogramBuffer, ResourceStates::UnorderedAccess); transitionBuffer(m_globalHistogramBuffer, ResourceStates::UnorderedAccess); RadixSortConstants radixSortConstants = {}; radixSortConstants.numKeys = m_gaussianSceneData.splatCount; radixSortConstants.threadBlocks = threadBlocks; m_radixSortDescriptorSetPrimaryToScratch->WriteConstant(0, &radixSortConstants, sizeof(radixSortConstants)); m_commandList.SetPipelineState(m_radixSortInitPipelineState); RHIDescriptorSet* radixInitSets[] = { m_radixSortDescriptorSetPrimaryToScratch }; m_commandList.SetComputeDescriptorSets(0, 1, radixInitSets, m_radixSortPipelineLayout); m_commandList.Dispatch(1, 1, 1); m_commandList.UAVBarrier(m_globalHistogramBuffer->GetResource()); for (uint32_t passIndex = 0; passIndex < kDeviceRadixSortPassCount; ++passIndex) { radixSortConstants.radixShift = passIndex * 8u; RHIDescriptorSet* activeRadixSet = (passIndex & 1u) == 0u ? m_radixSortDescriptorSetPrimaryToScratch : m_radixSortDescriptorSetScratchToPrimary; D3D12Buffer* destinationKeyBuffer = (passIndex & 1u) == 0u ? m_sortKeyScratchBuffer : m_sortKeyBuffer; D3D12Buffer* destinationOrderBuffer = (passIndex & 1u) == 0u ? m_orderScratchBuffer : m_orderBuffer; activeRadixSet->WriteConstant(0, &radixSortConstants, sizeof(radixSortConstants)); m_commandList.SetPipelineState(m_radixSortUpsweepPipelineState); m_commandList.SetComputeDescriptorSets(0, 1, &activeRadixSet, m_radixSortPipelineLayout); m_commandList.Dispatch(threadBlocks, 1, 1); m_commandList.UAVBarrier(m_passHistogramBuffer->GetResource()); if (pass3PassHistogramDebugEnabled && passIndex == 3u && !pass3PassHistogramCaptured) { transitionBuffer(m_passHistogramBuffer, ResourceStates::CopySrc); m_commandList.GetCommandList()->CopyBufferRegion( pass3PassHistogramReadbackBuffer.GetResource(), 0, m_passHistogramBuffer->GetResource(), 0, pass3PassHistogramReadbackBytes); transitionBuffer(m_passHistogramBuffer, ResourceStates::UnorderedAccess); pass3PassHistogramCaptured = true; } m_commandList.SetPipelineState(m_radixSortGlobalHistogramPipelineState); m_commandList.SetComputeDescriptorSets(0, 1, &activeRadixSet, m_radixSortPipelineLayout); m_commandList.Dispatch(1, 1, 1); m_commandList.UAVBarrier(m_globalHistogramBuffer->GetResource()); if (pass3PreScanGlobalHistogramDebugEnabled && passIndex == 3u && !pass3PreScanGlobalHistogramCaptured) { transitionBuffer(m_globalHistogramBuffer, ResourceStates::CopySrc); m_commandList.GetCommandList()->CopyBufferRegion( pass3PreScanGlobalHistogramReadbackBuffer.GetResource(), 0, m_globalHistogramBuffer->GetResource(), 0, pass3PreScanGlobalHistogramReadbackBytes); transitionBuffer(m_globalHistogramBuffer, ResourceStates::UnorderedAccess); pass3PreScanGlobalHistogramCaptured = true; } m_commandList.SetPipelineState(m_radixSortScanPipelineState); m_commandList.SetComputeDescriptorSets(0, 1, &activeRadixSet, m_radixSortPipelineLayout); m_commandList.Dispatch(kDeviceRadixSortRadix, 1, 1); m_commandList.UAVBarrier(m_passHistogramBuffer->GetResource()); m_commandList.SetPipelineState(m_radixSortDownsweepPipelineState); m_commandList.SetComputeDescriptorSets(0, 1, &activeRadixSet, m_radixSortPipelineLayout); m_commandList.Dispatch(threadBlocks, 1, 1); m_commandList.UAVBarrier(destinationKeyBuffer->GetResource()); m_commandList.UAVBarrier(destinationOrderBuffer->GetResource()); recordSortDebugStage( passIndex + 1u, destinationKeyBuffer, destinationOrderBuffer, ResourceStates::UnorderedAccess, ResourceStates::UnorderedAccess); } } transitionBuffer(m_preparedViewBuffer, ResourceStates::NonPixelShaderResource); transitionBuffer(m_orderBuffer, ResourceStates::NonPixelShaderResource); m_commandList.SetPipelineState(m_debugPipelineState); RHIDescriptorSet* debugSets[] = { m_debugDescriptorSet }; m_commandList.SetGraphicsDescriptorSets(0, 1, debugSets, m_debugPipelineLayout); m_commandList.SetPrimitiveTopology(PrimitiveTopology::TriangleStrip); m_commandList.Draw(4u, m_gaussianSceneData.splatCount, 0u, 0u); if (captureScreenshot) { AppendTrace("RenderFrame: close+execute capture pre-screenshot"); m_commandList.Close(); void* commandLists[] = { &m_commandList }; m_commandQueue.ExecuteCommandLists(1, commandLists); AppendTrace("RenderFrame: WaitForIdle before screenshot"); m_commandQueue.WaitForIdle(); if (sortDebugEnabled) { const std::filesystem::path debugPath = ResolveNearExecutable(L"phase3_sort_debug.txt"); std::ofstream debugOutput(debugPath, std::ios::binary | std::ios::trunc); const uint32_t* stageKeys = static_cast(sortDebugKeyReadbackBuffer.Map()); const uint32_t* stageOrder = static_cast(sortDebugOrderReadbackBuffer.Map()); if (debugOutput.is_open() && stageKeys != nullptr && stageOrder != nullptr) { debugOutput << "sample_count=" << sortDebugSampleCount << '\n'; for (uint32_t stageIndex = 0; stageIndex < kSortDebugStageCount; ++stageIndex) { debugOutput << "stage=" << sortDebugStageNames[stageIndex] << '\n'; const uint32_t* stageKeyBase = stageKeys + stageIndex * sortDebugSampleCount; const uint32_t* stageOrderBase = stageOrder + stageIndex * sortDebugSampleCount; for (uint32_t sampleIndex = 0; sampleIndex < sortDebugSampleCount; ++sampleIndex) { debugOutput << "key[" << sampleIndex << "]=" << stageKeyBase[sampleIndex] << '\n'; debugOutput << "order[" << sampleIndex << "]=" << stageOrderBase[sampleIndex] << '\n'; } } } if (stageOrder != nullptr) { sortDebugOrderReadbackBuffer.Unmap(); } if (stageKeys != nullptr) { sortDebugKeyReadbackBuffer.Unmap(); } } if (pass3PassHistogramDebugEnabled && pass3PassHistogramCaptured) { const uint32_t* passHistogram = static_cast(pass3PassHistogramReadbackBuffer.Map()); if (passHistogram != nullptr) { const std::filesystem::path passHistogramDebugPath = ResolveNearExecutable(L"phase3_passhist_debug.txt"); if (!passHistogramDebugPath.parent_path().empty()) { std::filesystem::create_directories(passHistogramDebugPath.parent_path()); } std::ofstream passHistogramOutput( passHistogramDebugPath, std::ios::binary | std::ios::trunc); if (passHistogramOutput.is_open()) { uint64_t totalCount = 0u; passHistogramOutput << "thread_blocks=" << threadBlocks << '\n'; for (uint32_t bin = 0; bin < kDeviceRadixSortRadix; ++bin) { uint64_t binTotal = 0u; for (uint32_t blockIndex = 0; blockIndex < threadBlocks; ++blockIndex) { binTotal += passHistogram[bin * threadBlocks + blockIndex]; } totalCount += binTotal; passHistogramOutput << "bin[" << bin << "]=" << binTotal << '\n'; } passHistogramOutput << "total_count=" << totalCount << '\n'; } pass3PassHistogramReadbackBuffer.Unmap(); } } if (pass3PreScanGlobalHistogramDebugEnabled && pass3PreScanGlobalHistogramCaptured) { const uint32_t* preScanGlobalHistogram = static_cast(pass3PreScanGlobalHistogramReadbackBuffer.Map()); if (preScanGlobalHistogram != nullptr) { const std::filesystem::path preScanGlobalHistogramPath = ResolveNearExecutable(L"phase3_pre_scan_globalhist.txt"); if (!preScanGlobalHistogramPath.parent_path().empty()) { std::filesystem::create_directories(preScanGlobalHistogramPath.parent_path()); } std::ofstream preScanGlobalHistogramOutput( preScanGlobalHistogramPath, std::ios::binary | std::ios::trunc); if (preScanGlobalHistogramOutput.is_open()) { constexpr uint32_t kPass3GlobalHistogramBase = kDeviceRadixSortRadix * 3u; for (uint32_t bin = 0; bin < kDeviceRadixSortRadix; ++bin) { preScanGlobalHistogramOutput << "bin[" << bin << "]=" << preScanGlobalHistogram[kPass3GlobalHistogramBase + bin] << '\n'; } } pass3PreScanGlobalHistogramReadbackBuffer.Unmap(); } } AppendTrace("RenderFrame: Capture sort snapshot"); if (!CaptureSortSnapshot()) { AppendTrace(std::string("RenderFrame: Capture sort snapshot failed: ") + NarrowAscii(m_lastErrorMessage)); } if (!kUseCpuSortBaseline) { AppendTrace("RenderFrame: Capture pass3 histogram debug"); if (!CapturePass3HistogramDebug()) { AppendTrace(std::string("RenderFrame: Capture pass3 histogram debug failed: ") + NarrowAscii(m_lastErrorMessage)); } } if (!m_screenshotPath.empty()) { const std::filesystem::path screenshotPath = ResolveNearExecutable(m_screenshotPath); if (!screenshotPath.parent_path().empty()) { std::filesystem::create_directories(screenshotPath.parent_path()); } AppendTrace("RenderFrame: Capture screenshot"); D3D12Screenshot::Capture( m_device, m_commandQueue, backBuffer, screenshotPath.string().c_str()); AppendTrace("RenderFrame: Capture screenshot done"); } m_commandAllocator.Reset(); m_commandList.Reset(); m_commandList.TransitionBarrier(backBuffer.GetResource(), ResourceStates::RenderTarget, ResourceStates::Present); m_commandList.Close(); void* presentCommandLists[] = { &m_commandList }; AppendTrace("RenderFrame: execute final present-transition list"); m_commandQueue.ExecuteCommandLists(1, presentCommandLists); } else { m_commandList.TransitionBarrier(backBuffer.GetResource(), ResourceStates::RenderTarget, ResourceStates::Present); m_commandList.Close(); void* commandLists[] = { &m_commandList }; AppendTrace("RenderFrame: execute+present"); m_commandQueue.ExecuteCommandLists(1, commandLists); m_swapChain.Present(1, 0); } if (sortDebugEnabled) { sortDebugOrderReadbackBuffer.Shutdown(); sortDebugKeyReadbackBuffer.Shutdown(); } if (pass3PassHistogramDebugEnabled) { pass3PassHistogramReadbackBuffer.Shutdown(); } if (pass3PreScanGlobalHistogramDebugEnabled) { pass3PreScanGlobalHistogramReadbackBuffer.Shutdown(); } m_hasRenderedAtLeastOneFrame = true; AppendTrace("RenderFrame: end"); } } // namespace XC3DGSD3D12