From 2432a646ced650489d7be17f70c8f4f11f8ced96 Mon Sep 17 00:00:00 2001 From: ssdfasd <2156608475@qq.com> Date: Sun, 22 Mar 2026 02:03:51 +0800 Subject: [PATCH] Fix audio module: implement WAV parsing and audio playback - Implement ParseWAVData and ParseWAVHeader in AudioLoader to properly parse WAV file headers (sample rate, channels, bits per sample, duration) - Modify Load() to call ParseWAVData for WAV files during loading - Add DecodeAudioData() to AudioSourceComponent to decode PCM bytes to float - Update SetClip() to trigger audio decoding - Fix ProcessAudio() to read from decoded data instead of empty output buffer - Add WAV parsing unit tests (ParseWAV_Mono44100_16bit, ParseWAV_Stereo48000_16bit) Fixes issues: - AudioLoader::ParseWAVData was a stub returning true without parsing - AudioLoader::Load didn't extract audio metadata from WAV headers - AudioSourceComponent::ProcessAudio read from empty m_outputBuffer All 167 tests pass. --- .../Components/AudioSourceComponent.h | 1 + .../src/Components/AudioSourceComponent.cpp | 95 ++++++++++++++++++- engine/src/Resources/AudioLoader.cpp | 78 +++++++++++++++ tests/Resources/test_audio_loader.cpp | 81 ++++++++++++++++ 4 files changed, 250 insertions(+), 5 deletions(-) diff --git a/engine/include/XCEngine/Components/AudioSourceComponent.h b/engine/include/XCEngine/Components/AudioSourceComponent.h index decb3ec9..973f7699 100644 --- a/engine/include/XCEngine/Components/AudioSourceComponent.h +++ b/engine/include/XCEngine/Components/AudioSourceComponent.h @@ -82,6 +82,7 @@ public: std::string GetName() const override { return "AudioSource"; } private: + void DecodeAudioData(); void Apply3DAttenuation(const Math::Vector3& listenerPosition); void UpdateEnergy(const float* buffer, Audio::uint32 sampleCount); diff --git a/engine/src/Components/AudioSourceComponent.cpp b/engine/src/Components/AudioSourceComponent.cpp index 83def59d..9b9cddde 100644 --- a/engine/src/Components/AudioSourceComponent.cpp +++ b/engine/src/Components/AudioSourceComponent.cpp @@ -52,7 +52,7 @@ void AudioSourceComponent::SetClip(Resources::AudioClip* clip) { m_clip = clip; m_isDecoded = false; if (clip && clip->IsValid()) { - m_decodedData.resize(clip->GetAudioData().Size() / 2); + DecodeAudioData(); } } @@ -168,10 +168,60 @@ void AudioSourceComponent::OnDestroy() { Stop(); } +void AudioSourceComponent::DecodeAudioData() { + if (!m_clip || !m_clip->IsValid()) { + return; + } + + if (m_isDecoded) { + return; + } + + const auto& audioData = m_clip->GetAudioData(); + if (audioData.Empty()) { + return; + } + + Audio::uint32 channels = m_clip->GetChannels(); + Audio::uint32 bitsPerSample = m_clip->GetBitsPerSample(); + uint32_t bytesPerSample = bitsPerSample / 8; + uint32_t totalSamples = static_cast(audioData.Size()) / bytesPerSample; + + m_decodedData.resize(totalSamples); + + const uint8_t* rawData = audioData.Data(); + + if (bitsPerSample == 16) { + const int16_t* samples16 = reinterpret_cast(rawData); + for (uint32_t i = 0; i < totalSamples; ++i) { + m_decodedData[i] = samples16[i] / 32768.0f; + } + } else if (bitsPerSample == 8) { + for (uint32_t i = 0; i < totalSamples; ++i) { + m_decodedData[i] = (rawData[i] - 128) / 128.0f; + } + } else if (bitsPerSample == 24) { + for (uint32_t i = 0; i < totalSamples; ++i) { + int32_t sample = (rawData[i * 3] | (rawData[i * 3 + 1] << 8) | (rawData[i * 3 + 2] << 16)); + if (sample & 0x800000) { + sample |= 0xFF000000; + } + m_decodedData[i] = sample / 8388608.0f; + } + } else if (bitsPerSample == 32) { + const int32_t* samples32 = reinterpret_cast(rawData); + for (uint32_t i = 0; i < totalSamples; ++i) { + m_decodedData[i] = samples32[i] / 2147483648.0f; + } + } + + m_isDecoded = true; +} + void AudioSourceComponent::ProcessAudio(float* buffer, Audio::uint32 sampleCount, Audio::uint32 channels, const Math::Vector3& listenerPosition, const Math::Quaternion& listenerRotation) { - if (m_playState != Audio::PlayState::Playing || !m_clip) { + if (m_playState != Audio::PlayState::Playing || !m_clip || !m_isDecoded) { return; } @@ -179,18 +229,53 @@ void AudioSourceComponent::ProcessAudio(float* buffer, Audio::uint32 sampleCount return; } + if (m_decodedData.empty()) { + return; + } + float volume = m_volume; if (m_spatialize) { Apply3DAttenuation(listenerPosition); volume *= m_volume; } - for (Audio::uint32 i = 0; i < sampleCount && i < BufferSize * 2; ++i) { - buffer[i] += m_outputBuffer[i] * volume; + Audio::uint32 clipChannels = m_clip->GetChannels(); + Audio::uint64 totalSamples = static_cast(m_decodedData.size()); + Audio::uint64 samplesPerFrame = sampleCount * channels; + + for (Audio::uint32 i = 0; i < sampleCount; ++i) { + for (Audio::uint32 ch = 0; ch < channels; ++ch) { + Audio::uint64 outputIndex = m_samplePosition + i * channels + ch; + + if (outputIndex >= totalSamples) { + if (m_isLooping && totalSamples > 0) { + outputIndex = outputIndex % totalSamples; + } else { + buffer[i * channels + ch] += 0.0f; + continue; + } + } + + Audio::uint64 decodedChannel = (ch < clipChannels) ? ch : (clipChannels - 1); + Audio::uint64 decodedIndex = (outputIndex / channels) * clipChannels + decodedChannel; + float sample = m_decodedData[decodedIndex]; + + buffer[i * channels + ch] += sample * volume; + } + } + + m_samplePosition += samplesPerFrame; + + if (m_samplePosition >= totalSamples) { + if (m_isLooping) { + m_samplePosition = m_samplePosition % totalSamples; + } else { + Stop(); + } } if (m_isEnergyDetecting) { - UpdateEnergy(buffer, sampleCount); + UpdateEnergy(buffer, sampleCount * channels); } } diff --git a/engine/src/Resources/AudioLoader.cpp b/engine/src/Resources/AudioLoader.cpp index ad9f4e32..c2f0ca24 100644 --- a/engine/src/Resources/AudioLoader.cpp +++ b/engine/src/Resources/AudioLoader.cpp @@ -1,10 +1,68 @@ #include "Resources/AudioLoader.h" #include "Resources/ResourceManager.h" #include "Resources/ResourceTypes.h" +#include namespace XCEngine { namespace Resources { +namespace { + +struct WAVHeader { + uint32_t sampleRate = 44100; + uint32_t channels = 2; + uint32_t bitsPerSample = 16; + uint32_t dataSize = 0; + uint32_t dataOffset = 44; +}; + +bool ParseWAVHeader(const uint8_t* data, size_t size, WAVHeader& header) { + if (size < 44) { + return false; + } + + if (data[0] != 'R' || data[1] != 'I' || data[2] != 'F' || data[3] != 'F') { + return false; + } + + if (data[8] != 'W' || data[9] != 'A' || data[10] != 'V' || data[11] != 'E') { + return false; + } + + if (data[12] != 'f' || data[13] != 'm' || data[14] != 't' || data[15] != ' ') { + return false; + } + + uint32_t subchunk1Size = *reinterpret_cast(&data[16]); + if (subchunk1Size < 16) { + return false; + } + + uint16_t audioFormat = *reinterpret_cast(&data[20]); + if (audioFormat != 1) { + return false; + } + + header.channels = *reinterpret_cast(&data[22]); + header.sampleRate = *reinterpret_cast(&data[24]); + header.bitsPerSample = *reinterpret_cast(&data[34]); + + if (data[36] != 'd' || data[37] != 'a' || data[38] != 't' || data[39] != 'a') { + return false; + } + + header.dataSize = *reinterpret_cast(&data[40]); + header.dataOffset = 44; + + if (header.dataOffset + header.dataSize > size) { + return false; + } + + return true; +} + +} // namespace + AudioLoader::AudioLoader() = default; AudioLoader::~AudioLoader() = default; @@ -38,6 +96,12 @@ LoadResult AudioLoader::Load(const Containers::String& path, const ImportSetting audioClip->m_guid = ResourceGUID::Generate(path); AudioFormat format = DetectAudioFormat(path, data); + if (format == AudioFormat::WAV) { + if (!ParseWAVData(data, audioClip)) { + delete audioClip; + return LoadResult("Failed to parse WAV data"); + } + } audioClip->SetAudioFormat(format); audioClip->SetAudioData(data); @@ -53,6 +117,20 @@ ImportSettings* AudioLoader::GetDefaultSettings() const { } bool AudioLoader::ParseWAVData(const Containers::Array& data, AudioClip* audioClip) { + WAVHeader header; + if (!ParseWAVHeader(data.Data(), data.Size(), header)) { + return false; + } + + audioClip->SetSampleRate(header.sampleRate); + audioClip->SetChannels(header.channels); + audioClip->SetBitsPerSample(header.bitsPerSample); + + uint32_t bytesPerSample = header.bitsPerSample / 8; + uint32_t totalSamples = header.dataSize / (bytesPerSample * header.channels); + float duration = static_cast(totalSamples) / (header.sampleRate * header.channels); + audioClip->SetDuration(duration); + return true; } diff --git a/tests/Resources/test_audio_loader.cpp b/tests/Resources/test_audio_loader.cpp index 7d8dd17d..7ca42c6a 100644 --- a/tests/Resources/test_audio_loader.cpp +++ b/tests/Resources/test_audio_loader.cpp @@ -2,12 +2,93 @@ #include #include #include +#include +#include +#include using namespace XCEngine::Resources; using namespace XCEngine::Containers; namespace { +#pragma pack(push, 1) +struct WAVFileHeader { + char riff[4]; + uint32_t fileSize; + char wave[4]; + char fmt[4]; + uint32_t fmtSize; + uint16_t audioFormat; + uint16_t numChannels; + uint32_t sampleRate; + uint32_t byteRate; + uint16_t blockAlign; + uint16_t bitsPerSample; + char data[4]; + uint32_t dataSize; +}; +#pragma pack(pop) + +void CreateTestWAVFile(const char* filepath, uint16_t channels, uint32_t sampleRate, uint16_t bitsPerSample, uint32_t numSamples) { + WAVFileHeader header = {}; + header.riff[0] = 'R'; header.riff[1] = 'I'; header.riff[2] = 'F'; header.riff[3] = 'F'; + header.wave[0] = 'W'; header.wave[1] = 'A'; header.wave[2] = 'V'; header.wave[3] = 'E'; + header.fmt[0] = 'f'; header.fmt[1] = 'm'; header.fmt[2] = 't'; header.fmt[3] = ' '; + header.fmtSize = 16; + header.audioFormat = 1; + header.numChannels = channels; + header.sampleRate = sampleRate; + header.bitsPerSample = bitsPerSample; + header.blockAlign = channels * bitsPerSample / 8; + header.byteRate = sampleRate * header.blockAlign; + header.data[0] = 'd'; header.data[1] = 'a'; header.data[2] = 't'; header.data[3] = 'a'; + header.dataSize = numSamples * header.blockAlign; + header.fileSize = 36 + header.dataSize; + + std::vector wavData(sizeof(WAVFileHeader) + header.dataSize, 0); + std::memcpy(wavData.data(), &header, sizeof(WAVFileHeader)); + + std::ofstream file(filepath, std::ios::binary); + file.write(reinterpret_cast(wavData.data()), wavData.size()); +} + +TEST(AudioLoader, ParseWAV_Mono44100_16bit) { + const char* testPath = "test_mono_44100.wav"; + CreateTestWAVFile(testPath, 1, 44100, 16, 44100); + + AudioLoader loader; + LoadResult result = loader.Load(testPath); + + EXPECT_TRUE(result); + if (result) { + AudioClip* clip = static_cast(result.resource); + EXPECT_EQ(clip->GetSampleRate(), 44100u); + EXPECT_EQ(clip->GetChannels(), 1u); + EXPECT_EQ(clip->GetBitsPerSample(), 16u); + EXPECT_GT(clip->GetDuration(), 0.0f); + } + + std::remove(testPath); +} + +TEST(AudioLoader, ParseWAV_Stereo48000_16bit) { + const char* testPath = "test_stereo_48000.wav"; + CreateTestWAVFile(testPath, 2, 48000, 16, 4800); + + AudioLoader loader; + LoadResult result = loader.Load(testPath); + + EXPECT_TRUE(result); + if (result) { + AudioClip* clip = static_cast(result.resource); + EXPECT_EQ(clip->GetSampleRate(), 48000u); + EXPECT_EQ(clip->GetChannels(), 2u); + EXPECT_EQ(clip->GetBitsPerSample(), 16u); + } + + std::remove(testPath); +} + TEST(AudioLoader, GetResourceType) { AudioLoader loader; EXPECT_EQ(loader.GetResourceType(), ResourceType::AudioClip);