diff --git a/docs/plan/Audio模块架构最佳实践重构计划_2026-04-14.md b/docs/plan/Audio模块架构最佳实践重构计划_2026-04-14.md new file mode 100644 index 00000000..5fd5754d --- /dev/null +++ b/docs/plan/Audio模块架构最佳实践重构计划_2026-04-14.md @@ -0,0 +1,216 @@ +# Audio 模块架构最佳实践重构计划 +日期:2026-04-14 + +## 1. 目标 + +本轮不是继续给当前 Audio 模块叠功能,而是把它从“可用的简化运行时”推进到“可扩展、可回归、契约清晰的正式架构”。 + +本计划聚焦以下最佳实践差距: + +1. 混音主链仍是 `game thread push -> backend pending buffer` +2. `AudioSourceComponent` 持有实例级解码 PCM,资源复用模型不对 +3. mixer / effect / routing 仍是裸指针图,ownership 不清晰 +4. backend API 能力声明与实际实现不一致 +5. master gain / mute 的职责落点分裂 +6. 混音路径仍有较多临时分配,RT-safe 程度不足 + +## 2. 本轮边界 + +本轮做: + +1. 先把资源层与 source/voice 层边界做对 +2. 先把 mixer graph 的 ownership 和路由契约做清楚 +3. 先把 backend 抽象语义收口到“说什么就真支持什么” +4. 先把主线程混音路径中的明显重复分配和重复解码收掉 +5. 为后续“真正的音频线程拉取式渲染”铺接口和数据结构基础 + +本轮不直接做: + +1. 不在第一步就推倒重写成完整 callback renderer +2. 不先上 streaming / bank / snapshot / event system +3. 不先扩平台后端 +4. 不把 editor 可视化音频图一起塞进本轮 + +## 3. 重构分期 + +### Phase 0:冻结当前外部行为 + +目标: +先保证现有运行时行为有回归保护,避免架构重构时把已有能力打坏。 + +执行项: + +1. 盘点现有 Audio 单测覆盖范围 +2. 明确现阶段保留行为: + `Play / Pause / Stop / Loop / Seek / Spatial Pan / HRTF / Reverb Send / Mixer Route` +3. 记录当前对外契约: + `AudioClip -> AudioSourceComponent -> AudioSystem -> IAudioBackend` + +完成标准: + +1. 关键行为都有最小回归测试入口 +2. 后续阶段改动不需要靠人工听感判断是否回归 + +### Phase 1:重做资源复用模型 + +目标: +把“音频资源”和“播放实例”彻底拆开。 + +执行项: + +1. 将 decoded float PCM 从 `AudioSourceComponent` 下沉到 `AudioClip` +2. 让 `AudioClip` 负责: + PCM 原始字节 + 派生元数据 + 共享 decoded float cache +3. 让 `AudioSourceComponent` 只保留: + clip 引用 + 播放游标 + voice 参数 + 空间化参数 +4. 统一缓存失效策略: + PCM / channels / bitsPerSample / sampleRate 改动时,duration 与 decoded cache 一起刷新 +5. 补资源层测试,覆盖 decoded cache 和派生元数据刷新 + +完成标准: + +1. 同一 `AudioClip` 被多个 source 使用时不再重复解码 +2. source 不再持有整份 clip 的 float PCM 副本 +3. `AudioClip` 的 duration / frameCount / sampleCount / decoded cache 语义一致 + +当前状态: +`进行中,本轮先执行这一阶段。` + +### Phase 2:收口 mixer graph ownership + +目标: +把当前“裸指针路由”收口成显式可管理的图结构。 + +执行项: + +1. 为 mixer node 建立明确 owner +2. 明确 source 输出路由、listener reverb send、mixer output 的生命周期规则 +3. 禁止悬挂 graph node 被继续访问 +4. 为未来 editor/runtime graph 统一预留 handle 或 registry 入口 + +完成标准: + +1. graph 生命周期可推理 +2. scene 切换与对象销毁不会留下悬挂路由 +3. mixer routing 可以被测试验证 + +### Phase 3:统一控制语义与 backend 能力 + +目标: +去掉“同一语义多处实现”的问题。 + +执行项: + +1. 统一 master volume / mute 的唯一 owner +2. backend 只保留设备与提交职责 +3. 移除误导性的 `WASAPIBackend` 别名或改成真实实现 +4. 明确 `SetDevice()` 是否真支持热切换;如果不支持,就不要暴露伪能力 + +完成标准: + +1. gain staging 只有一套主语义 +2. backend 能力声明与实现一致 + +### Phase 4:从主线程推送过渡到音频线程拉取 + +目标: +把当前 `Update()` 主导混音的模型,重构为更接近实时音频最佳实践的 render path。 + +执行项: + +1. 从 `AudioSystem` 中拆出音频渲染上下文 +2. 让 backend 线程按设备需求拉取 render block +3. 把游戏线程职责收缩到: + 发布 source/listener 状态 + 提交控制参数 + 不直接承担设备节奏的最终混音责任 +4. 明确 block size、latency、underrun fallback 行为 + +完成标准: + +1. 设备消费节奏不再依赖游戏帧节奏 +2. render 线程契约独立成立 + +### Phase 5:RT-safe 清理 + +目标: +减少实时路径上的临时分配和不可控开销。 + +执行项: + +1. 缓存 `AudioSystem` 混音 scratch buffer +2. 减少 `unordered_map` / `vector` 的逐帧临时构建 +3. 把 `Equalizer` / `Reverbation` / `FFTFilter` 的临时缓冲改成复用型工作区 +4. 明确哪些 DSP 允许进实时渲染链,哪些只能做分析 + +完成标准: + +1. 混音热路径不再做明显重复分配 +2. DSP 链路更接近 RT-safe + +### Phase 6:回归测试与阶段收口 + +目标: +让架构重构具备真正的落地闭环。 + +执行项: + +1. 补充资源共享、graph 生命周期、backend 语义、render 路径相关测试 +2. 补阶段性文档 +3. 按阶段提交 git commit,并在每个稳定节点推送 + +完成标准: + +1. 每一阶段都有对应提交点 +2. 关键行为和关键架构约束都有自动化保护 + +## 4. 执行顺序 + +按优先级执行: + +1. `Phase 1` 资源复用模型 +2. `Phase 3` backend 与控制语义收口 +3. `Phase 2` mixer graph ownership +4. `Phase 5` RT-safe 清理 +5. `Phase 4` 音频线程拉取式渲染 +6. `Phase 6` 收口与验证贯穿全程 + +原因: + +1. `Phase 1` 改动收益大、风险低、最容易稳定落地 +2. `Phase 3` 能先消掉接口层误导 +3. `Phase 2` 需要建立在边界先清楚的前提下 +4. `Phase 4` 是最大手术,必须放到前置契约稳定之后 + +## 5. 当前这一步准备执行的内容 + +第一批落地项: + +1. 新增 `AudioClip` 共享 decoded float cache +2. 移除 `AudioSourceComponent` 的实例级 `m_decodedData` +3. 统一 `AudioClip` 派生数据失效与刷新逻辑 +4. 补资源层和 source 层测试 + +这一步完成后,Audio 模块会先从“每个 source 自带一份解码副本”进入“资源共享 + 播放实例分离”的正确方向。 + +## 6. 阶段性提交策略 + +每阶段至少形成一次独立提交: + +1. `phase1/audio-clip-shared-decoded-cache` +2. `phase3/backend-contract-cleanup` +3. `phase2/mixer-graph-ownership` +4. `phase5/rt-safe-buffer-reuse` +5. `phase4/audio-thread-pull-render` + +执行要求: + +1. 每次只提交一个可回归的小阶段 +2. 提交前先跑对应最小测试集 +3. 阶段完成后及时推送,避免大堆积 + diff --git a/engine/include/XCEngine/Components/AudioSourceComponent.h b/engine/include/XCEngine/Components/AudioSourceComponent.h index db02065c..9f234da9 100644 --- a/engine/include/XCEngine/Components/AudioSourceComponent.h +++ b/engine/include/XCEngine/Components/AudioSourceComponent.h @@ -2,7 +2,10 @@ #include #include +#include #include +#include +#include #include #include #include @@ -30,6 +33,10 @@ public: void SetClip(Resources::AudioClip* clip); Resources::AudioClip* GetClip() const { return m_clip; } + void SetClipPath(const std::string& clipPath); + void ClearClip(); + const std::string& GetClipPath() const { return m_clipPath; } + const Resources::AssetRef& GetClipAssetRef() const { return m_clipRef; } void SetVolume(float volume); float GetVolume() const { return m_volume; } @@ -40,6 +47,13 @@ public: void SetPan(float pan); float GetPan() const { return m_pan; } + void SetHRTFEnabled(bool enabled); + bool IsHRTFEnabled() const { return m_useHRTF; } + void SetHRTFCrossFeed(float crossFeed); + float GetHRTFCrossFeed() const { return m_hrtf.GetCrossFeed(); } + void SetHRTFQuality(Audio::uint32 level); + Audio::uint32 GetHRTFQuality() const { return m_hrtf.GetQualityLevel(); } + void SetLooping(bool loop); bool IsLooping() const { return m_isLooping; } @@ -74,20 +88,34 @@ public: void OnEnable() override; void OnDisable() override; void OnDestroy() override; + void Serialize(std::ostream& os) const override; + void Deserialize(std::istream& is) override; - void ProcessAudio(float* buffer, Audio::uint32 sampleCount, Audio::uint32 channels, + void ProcessAudio(float* buffer, Audio::uint32 frameCount, Audio::uint32 channels, const Math::Vector3& listenerPosition, - const Math::Quaternion& listenerRotation); + const Math::Quaternion& listenerRotation, + const Math::Vector3& listenerVelocity = Math::Vector3::Zero(), + float listenerDopplerLevel = 1.0f, + float speedOfSound = 343.0f, + Audio::uint32 outputSampleRate = 0); std::string GetName() const override { return "AudioSource"; } private: - void DecodeAudioData(); - void Apply3DAttenuation(const Math::Vector3& listenerPosition); + float Compute3DAttenuation(const Math::Vector3& listenerPosition) const; + float ComputeSpatialPan(const Math::Vector3& listenerPosition, + const Math::Quaternion& listenerRotation) const; + double ComputeDopplerFactor(const Math::Vector3& listenerPosition, + const Math::Vector3& listenerVelocity, + float listenerDopplerLevel, + float speedOfSound) const; void UpdateEnergy(const float* buffer, Audio::uint32 sampleCount); private: + Resources::ResourceHandle m_clipHandle; Resources::AudioClip* m_clip = nullptr; + std::string m_clipPath; + Resources::AssetRef m_clipRef; Audio::AudioMixer* m_outputMixer = nullptr; Audio::PlayState m_playState = Audio::PlayState::Stopped; @@ -97,21 +125,22 @@ private: float m_pitch = 1.0f; float m_pan = 0.0f; bool m_spatialize = true; + bool m_useHRTF = false; Audio::Audio3DParams m_3DParams; + Audio::HRTF m_hrtf; Audio::uint64 m_samplePosition = 0; + double m_playbackPosition = 0.0; double m_lastingTime = 0.0; + Math::Vector3 m_velocity = Math::Vector3::Zero(); + Math::Vector3 m_lastPosition = Math::Vector3::Zero(); + bool m_hasLastPosition = false; bool m_isEnergyDetecting = false; float m_energy = 0.0f; float m_maxEnergy = 5.0f; std::deque m_energyHistory; - - static constexpr size_t BufferSize = 8192; - std::vector m_outputBuffer; - std::vector m_decodedData; - bool m_isDecoded = false; }; } // namespace Components diff --git a/engine/include/XCEngine/Resources/AudioClip/AudioClip.h b/engine/include/XCEngine/Resources/AudioClip/AudioClip.h index e3ffb50e..07e3acef 100644 --- a/engine/include/XCEngine/Resources/AudioClip/AudioClip.h +++ b/engine/include/XCEngine/Resources/AudioClip/AudioClip.h @@ -3,6 +3,7 @@ #include #include #include +#include namespace XCEngine { namespace Resources { @@ -35,20 +36,31 @@ public: size_t GetMemorySize() const override { return m_memorySize; } void Release() override; - void SetAudioData(const Containers::Array& data); - const Containers::Array& GetAudioData() const { return m_audioData; } + void SetPCMData(const Containers::Array& data); + const Containers::Array& GetPCMData() const { return m_pcmData; } + size_t GetPCMDataSize() const { return m_pcmData.Size(); } + + // Legacy compatibility: audio data now means decoded/interpretable PCM bytes. + void SetAudioData(const Containers::Array& data) { SetPCMData(data); } + const Containers::Array& GetAudioData() const { return GetPCMData(); } - void SetSampleRate(Core::uint32 rate) { m_sampleRate = rate; } + void SetSampleRate(Core::uint32 rate); Core::uint32 GetSampleRate() const { return m_sampleRate; } - void SetChannels(Core::uint32 channels) { m_channels = channels; } + void SetChannels(Core::uint32 channels); Core::uint32 GetChannels() const { return m_channels; } - void SetBitsPerSample(Core::uint32 bits) { m_bitsPerSample = bits; } + void SetBitsPerSample(Core::uint32 bits); Core::uint32 GetBitsPerSample() const { return m_bitsPerSample; } void SetDuration(float seconds) { m_duration = seconds; } float GetDuration() const { return m_duration; } + + const std::vector& GetDecodedPCMData() const; + bool HasDecodedPCMData() const { return m_decodedPCMValid; } + + Core::uint64 GetFrameCount() const; + Core::uint64 GetSampleCount() const; void SetAudioFormat(AudioFormat format) { m_format = format; } AudioFormat GetAudioFormat() const { return m_format; } @@ -66,7 +78,14 @@ public: void SetRHIResource(class IRHIAudioBuffer* resource); private: - Containers::Array m_audioData; + void RefreshDerivedData(); + void RefreshMemorySize(); + void InvalidateDecodedPCMData(); + void BuildDecodedPCMData() const; + + Containers::Array m_pcmData; + mutable std::vector m_decodedPCMData; + mutable bool m_decodedPCMValid = false; Core::uint32 m_sampleRate = 44100; Core::uint32 m_channels = 2; diff --git a/engine/src/Components/AudioSourceComponent.cpp b/engine/src/Components/AudioSourceComponent.cpp index 9b9cddde..8df7eb71 100644 --- a/engine/src/Components/AudioSourceComponent.cpp +++ b/engine/src/Components/AudioSourceComponent.cpp @@ -1,16 +1,153 @@ #include #include +#include +#include #include #include +#include namespace XCEngine { namespace Components { -AudioSourceComponent::AudioSourceComponent() - : m_outputBuffer(BufferSize * 2, 0.0f) -{ +namespace { + +std::string ToStdString(const Containers::String& value) { + return std::string(value.CStr()); } +bool HasVirtualPathScheme(const std::string& path) { + return path.find("://") != std::string::npos; +} + +std::string EncodeAssetRef(const Resources::AssetRef& assetRef) { + if (!assetRef.IsValid()) { + return std::string(); + } + + return ToStdString(assetRef.assetGuid.ToString()) + "," + + std::to_string(assetRef.localID) + "," + + std::to_string(static_cast(assetRef.resourceType)); +} + +bool TryDecodeAssetRef(const std::string& value, Resources::AssetRef& outRef) { + const size_t firstComma = value.find(','); + const size_t secondComma = + firstComma == std::string::npos ? std::string::npos : value.find(',', firstComma + 1); + if (firstComma == std::string::npos || secondComma == std::string::npos) { + return false; + } + + outRef.assetGuid = Resources::AssetGUID::ParseOrDefault( + Containers::String(value.substr(0, firstComma).c_str())); + outRef.localID = static_cast(std::stoull( + value.substr(firstComma + 1, secondComma - firstComma - 1))); + outRef.resourceType = static_cast(std::stoi( + value.substr(secondComma + 1))); + return outRef.IsValid(); +} + +double WrapFramePosition(double framePosition, Audio::uint64 totalFrames) { + if (totalFrames == 0) { + return 0.0; + } + + const double totalFrameCount = static_cast(totalFrames); + double wrapped = std::fmod(framePosition, totalFrameCount); + if (wrapped < 0.0) { + wrapped += totalFrameCount; + } + return wrapped; +} + +float ReadDecodedSample(const std::vector& decodedData, + Audio::uint64 totalFrames, + Audio::uint32 clipChannels, + Audio::int64 frameIndex, + Audio::uint32 channel, + bool looping) { + if (decodedData.empty() || totalFrames == 0 || clipChannels == 0) { + return 0.0f; + } + + Audio::int64 resolvedFrame = frameIndex; + if (looping) { + const Audio::int64 frameCount = static_cast(totalFrames); + resolvedFrame %= frameCount; + if (resolvedFrame < 0) { + resolvedFrame += frameCount; + } + } else if (resolvedFrame < 0 || resolvedFrame >= static_cast(totalFrames)) { + return 0.0f; + } + + const Audio::uint32 clipChannel = std::min(channel, clipChannels - 1); + const Audio::uint64 decodedIndex = + static_cast(resolvedFrame) * clipChannels + clipChannel; + return decodedData[decodedIndex]; +} + +float SampleDecodedChannel(const std::vector& decodedData, + Audio::uint64 totalFrames, + Audio::uint32 clipChannels, + double framePosition, + Audio::uint32 channel, + bool looping) { + if (decodedData.empty() || totalFrames == 0 || clipChannels == 0) { + return 0.0f; + } + + double samplePosition = framePosition; + if (looping) { + samplePosition = WrapFramePosition(samplePosition, totalFrames); + } else { + const double maxFrame = static_cast(totalFrames - 1); + samplePosition = std::clamp(samplePosition, 0.0, maxFrame); + } + + const Audio::int64 frame0 = static_cast(std::floor(samplePosition)); + const Audio::int64 frame1 = frame0 + 1; + const float t = static_cast(samplePosition - static_cast(frame0)); + + const float sample0 = ReadDecodedSample(decodedData, totalFrames, clipChannels, frame0, channel, looping); + const float sample1 = looping + ? ReadDecodedSample(decodedData, totalFrames, clipChannels, frame1, channel, true) + : ReadDecodedSample(decodedData, totalFrames, clipChannels, + std::min(frame1, static_cast(totalFrames - 1)), + channel, false); + return sample0 + (sample1 - sample0) * t; +} + +void ComputePanGains(float pan, float& leftGain, float& rightGain) { + const float clampedPan = std::clamp(pan, -1.0f, 1.0f); + leftGain = (clampedPan > 0.0f) ? (1.0f - clampedPan) : 1.0f; + rightGain = (clampedPan < 0.0f) ? (1.0f + clampedPan) : 1.0f; +} + +void ApplyStereoSpread(float leftIn, float rightIn, float spread, float& leftOut, float& rightOut) { + const float clampedSpread = std::clamp(spread, 0.0f, 1.0f); + const float mono = (leftIn + rightIn) * 0.5f; + leftOut = mono + (leftIn - mono) * clampedSpread; + rightOut = mono + (rightIn - mono) * clampedSpread; +} + +void ApplyPanToBuffer(float* buffer, Audio::uint32 frameCount, Audio::uint32 channels, float pan) { + if (buffer == nullptr || channels < 2) { + return; + } + + float leftGain = 1.0f; + float rightGain = 1.0f; + ComputePanGains(pan, leftGain, rightGain); + for (Audio::uint32 frame = 0; frame < frameCount; ++frame) { + buffer[frame * channels] *= leftGain; + buffer[frame * channels + 1] *= rightGain; + } +} + +} // namespace + +AudioSourceComponent::AudioSourceComponent() = default; + AudioSourceComponent::~AudioSourceComponent() { if (m_playState == Audio::PlayState::Playing) { Audio::AudioSystem::Get().UnregisterSource(this); @@ -22,13 +159,28 @@ void AudioSourceComponent::Play() { return; } + if (m_playState == Audio::PlayState::Playing) { + m_samplePosition = 0; + m_playbackPosition = 0.0; + m_lastingTime = 0.0; + m_velocity = Math::Vector3::Zero(); + m_hasLastPosition = false; + m_hrtf.ResetState(); + return; + } + if (m_playState == Audio::PlayState::Paused) { m_playState = Audio::PlayState::Playing; + Audio::AudioSystem::Get().RegisterSource(this); return; } m_samplePosition = 0; + m_playbackPosition = 0.0; m_lastingTime = 0.0; + m_velocity = Math::Vector3::Zero(); + m_hasLastPosition = false; + m_hrtf.ResetState(); m_playState = Audio::PlayState::Playing; Audio::AudioSystem::Get().RegisterSource(this); } @@ -44,18 +196,78 @@ void AudioSourceComponent::Stop(Audio::StopMode mode) { if (m_playState != Audio::PlayState::Stopped) { m_playState = Audio::PlayState::Stopped; m_samplePosition = 0; + m_playbackPosition = 0.0; + m_lastingTime = 0.0; + m_velocity = Math::Vector3::Zero(); + m_hasLastPosition = false; + m_hrtf.ResetState(); Audio::AudioSystem::Get().UnregisterSource(this); } } void AudioSourceComponent::SetClip(Resources::AudioClip* clip) { + m_clipHandle = Resources::ResourceHandle(clip); m_clip = clip; - m_isDecoded = false; + m_clipPath.clear(); + m_clipRef.Reset(); + m_samplePosition = 0; + m_playbackPosition = 0.0; + m_lastingTime = 0.0; + m_velocity = Math::Vector3::Zero(); + m_hasLastPosition = false; + m_hrtf.ResetState(); if (clip && clip->IsValid()) { - DecodeAudioData(); + const std::string clipPath = ToStdString(clip->GetPath()); + if (!clipPath.empty()) { + m_clipPath = clipPath; + if (!Resources::ResourceManager::Get().TryGetAssetRef( + m_clipPath.c_str(), + Resources::ResourceType::AudioClip, + m_clipRef)) { + m_clipRef.Reset(); + } + } + static_cast(m_clip->GetDecodedPCMData()); } } +void AudioSourceComponent::SetClipPath(const std::string& clipPath) { + m_clipRef.Reset(); + m_clipPath = clipPath; + if (!m_clipPath.empty() && + !Resources::ResourceManager::Get().TryGetAssetRef( + m_clipPath.c_str(), + Resources::ResourceType::AudioClip, + m_clipRef)) { + m_clipRef.Reset(); + } + + m_clipHandle.Reset(); + m_clip = nullptr; + m_samplePosition = 0; + m_playbackPosition = 0.0; + m_lastingTime = 0.0; + m_velocity = Math::Vector3::Zero(); + m_hasLastPosition = false; + m_hrtf.ResetState(); + + if (m_clipPath.empty()) { + return; + } + + m_clipHandle = Resources::ResourceManager::Get().Load(m_clipPath.c_str()); + m_clip = m_clipHandle.Get(); + if (m_clip != nullptr && m_clip->IsValid()) { + static_cast(m_clip->GetDecodedPCMData()); + } +} + +void AudioSourceComponent::ClearClip() { + m_clipRef.Reset(); + m_clipPath.clear(); + SetClip(nullptr); +} + void AudioSourceComponent::SetVolume(float volume) { m_volume = std::max(0.0f, std::min(1.0f, volume)); } @@ -68,6 +280,21 @@ void AudioSourceComponent::SetPan(float pan) { m_pan = std::max(-1.0f, std::min(1.0f, pan)); } +void AudioSourceComponent::SetHRTFEnabled(bool enabled) { + m_useHRTF = enabled; + if (!enabled) { + m_hrtf.ResetState(); + } +} + +void AudioSourceComponent::SetHRTFCrossFeed(float crossFeed) { + m_hrtf.SetCrossFeed(crossFeed); +} + +void AudioSourceComponent::SetHRTFQuality(Audio::uint32 level) { + m_hrtf.SetQualityLevel(level); +} + void AudioSourceComponent::SetLooping(bool loop) { m_isLooping = loop; } @@ -78,10 +305,17 @@ void AudioSourceComponent::SetSpatialize(bool spatialize) { void AudioSourceComponent::Set3DParams(const Audio::Audio3DParams& params) { m_3DParams = params; + m_3DParams.dopplerLevel = std::max(0.0f, m_3DParams.dopplerLevel); + m_3DParams.speedOfSound = std::max(1.0f, m_3DParams.speedOfSound); + m_3DParams.minDistance = std::max(0.0f, m_3DParams.minDistance); + m_3DParams.maxDistance = std::max(m_3DParams.minDistance, m_3DParams.maxDistance); + m_3DParams.panLevel = std::clamp(m_3DParams.panLevel, 0.0f, 1.0f); + m_3DParams.spread = std::clamp(m_3DParams.spread, 0.0f, 1.0f); + m_3DParams.reverbZoneMix = std::clamp(m_3DParams.reverbZoneMix, 0.0f, 1.0f); } void AudioSourceComponent::SetDopplerLevel(float level) { - m_3DParams.dopplerLevel = level; + m_3DParams.dopplerLevel = std::max(0.0f, level); } void AudioSourceComponent::SetSpread(float spread) { @@ -101,11 +335,24 @@ void AudioSourceComponent::SetTime(float seconds) { return; } - Audio::uint32 sampleRate = m_clip->GetSampleRate(); - Audio::uint32 channels = m_clip->GetChannels(); - Audio::uint64 sampleOffset = static_cast(seconds * sampleRate * channels); - m_samplePosition = sampleOffset; - m_lastingTime = seconds; + const Audio::uint32 sampleRate = m_clip->GetSampleRate(); + if (sampleRate == 0) { + m_samplePosition = 0; + m_playbackPosition = 0.0; + m_lastingTime = 0.0; + return; + } + + const double frameOffset = static_cast(std::max(0.0f, seconds)) * sampleRate; + const Audio::uint64 totalFrames = m_clip->GetFrameCount(); + if (totalFrames > 0) { + m_playbackPosition = std::min(frameOffset, static_cast(totalFrames)); + m_samplePosition = static_cast(m_playbackPosition); + } else { + m_samplePosition = 0; + m_playbackPosition = 0.0; + } + m_lastingTime = m_playbackPosition / static_cast(sampleRate); } float AudioSourceComponent::GetTime() const { @@ -129,26 +376,26 @@ void AudioSourceComponent::StopEnergyDetect() { } void AudioSourceComponent::Update(float deltaTime) { + if (m_gameObject) { + const Math::Vector3 position = transform().GetPosition(); + if (m_hasLastPosition && deltaTime > 0.0f) { + m_velocity = (position - m_lastPosition) / deltaTime; + } else { + m_velocity = Math::Vector3::Zero(); + } + m_lastPosition = position; + m_hasLastPosition = true; + } else { + m_velocity = Math::Vector3::Zero(); + m_hasLastPosition = false; + } + if (m_playState != Audio::PlayState::Playing || !m_clip) { return; } - m_lastingTime += deltaTime * m_pitch; - - Audio::uint32 channels = m_clip->GetChannels(); - Audio::uint32 sampleRate = m_clip->GetSampleRate(); - Audio::uint64 samplesPerSecond = sampleRate * channels; - Audio::uint64 samplesToAdvance = static_cast(deltaTime * m_pitch * samplesPerSecond); - m_samplePosition += samplesToAdvance; - - Audio::uint64 totalSamples = static_cast(m_clip->GetAudioData().Size()) / (m_clip->GetBitsPerSample() / 8); - - if (m_samplePosition >= totalSamples) { - if (m_isLooping) { - m_samplePosition = m_samplePosition % totalSamples; - } else { - Stop(); - } + if (m_clip->GetSampleRate() > 0) { + m_lastingTime = m_playbackPosition / static_cast(m_clip->GetSampleRate()); } } @@ -168,138 +415,431 @@ void AudioSourceComponent::OnDestroy() { Stop(); } -void AudioSourceComponent::DecodeAudioData() { - if (!m_clip || !m_clip->IsValid()) { - return; +void AudioSourceComponent::Serialize(std::ostream& os) const { + Resources::AssetRef serializedClipRef = m_clipRef; + std::string serializedClipPath = m_clipPath; + if (serializedClipPath.empty() && m_clip != nullptr) { + serializedClipPath = ToStdString(m_clip->GetPath()); } - if (m_isDecoded) { - return; + if (!serializedClipRef.IsValid() && + !serializedClipPath.empty() && + !HasVirtualPathScheme(serializedClipPath) && + Resources::ResourceManager::Get().TryGetAssetRef( + serializedClipPath.c_str(), + Resources::ResourceType::AudioClip, + serializedClipRef)) { } - const auto& audioData = m_clip->GetAudioData(); - if (audioData.Empty()) { - return; + if (serializedClipRef.IsValid() || !HasVirtualPathScheme(serializedClipPath)) { + serializedClipPath.clear(); } - Audio::uint32 channels = m_clip->GetChannels(); - Audio::uint32 bitsPerSample = m_clip->GetBitsPerSample(); - uint32_t bytesPerSample = bitsPerSample / 8; - uint32_t totalSamples = static_cast(audioData.Size()) / bytesPerSample; - - m_decodedData.resize(totalSamples); - - const uint8_t* rawData = audioData.Data(); - - if (bitsPerSample == 16) { - const int16_t* samples16 = reinterpret_cast(rawData); - for (uint32_t i = 0; i < totalSamples; ++i) { - m_decodedData[i] = samples16[i] / 32768.0f; - } - } else if (bitsPerSample == 8) { - for (uint32_t i = 0; i < totalSamples; ++i) { - m_decodedData[i] = (rawData[i] - 128) / 128.0f; - } - } else if (bitsPerSample == 24) { - for (uint32_t i = 0; i < totalSamples; ++i) { - int32_t sample = (rawData[i * 3] | (rawData[i * 3 + 1] << 8) | (rawData[i * 3 + 2] << 16)); - if (sample & 0x800000) { - sample |= 0xFF000000; - } - m_decodedData[i] = sample / 8388608.0f; - } - } else if (bitsPerSample == 32) { - const int32_t* samples32 = reinterpret_cast(rawData); - for (uint32_t i = 0; i < totalSamples; ++i) { - m_decodedData[i] = samples32[i] / 2147483648.0f; - } - } - - m_isDecoded = true; + os << "clipPath=" << serializedClipPath << ";"; + os << "clipRef=" << EncodeAssetRef(serializedClipRef) << ";"; + os << "volume=" << m_volume << ";"; + os << "pitch=" << m_pitch << ";"; + os << "pan=" << m_pan << ";"; + os << "looping=" << (m_isLooping ? 1 : 0) << ";"; + os << "spatialize=" << (m_spatialize ? 1 : 0) << ";"; + os << "hrtfEnabled=" << (m_useHRTF ? 1 : 0) << ";"; + os << "hrtfCrossFeed=" << m_hrtf.GetCrossFeed() << ";"; + os << "hrtfQuality=" << m_hrtf.GetQualityLevel() << ";"; + os << "dopplerLevel=" << m_3DParams.dopplerLevel << ";"; + os << "speedOfSound=" << m_3DParams.speedOfSound << ";"; + os << "minDistance=" << m_3DParams.minDistance << ";"; + os << "maxDistance=" << m_3DParams.maxDistance << ";"; + os << "panLevel=" << m_3DParams.panLevel << ";"; + os << "spread=" << m_3DParams.spread << ";"; + os << "reverbZoneMix=" << m_3DParams.reverbZoneMix << ";"; } -void AudioSourceComponent::ProcessAudio(float* buffer, Audio::uint32 sampleCount, Audio::uint32 channels, - const Math::Vector3& listenerPosition, - const Math::Quaternion& listenerRotation) { - if (m_playState != Audio::PlayState::Playing || !m_clip || !m_isDecoded) { - return; - } +void AudioSourceComponent::Deserialize(std::istream& is) { + ClearClip(); + SetVolume(1.0f); + SetPitch(1.0f); + SetPan(0.0f); + SetLooping(false); + SetSpatialize(true); + SetHRTFEnabled(false); + SetHRTFCrossFeed(0.0f); + SetHRTFQuality(2); + Set3DParams(Audio::Audio3DParams()); - if (channels == 0 || sampleCount == 0) { - return; - } + std::string token; + std::string pendingClipPath; + Resources::AssetRef pendingClipRef; + Audio::Audio3DParams params = m_3DParams; + while (std::getline(is, token, ';')) { + if (token.empty()) { + continue; + } - if (m_decodedData.empty()) { - return; - } + const size_t eqPos = token.find('='); + if (eqPos == std::string::npos) { + continue; + } - float volume = m_volume; - if (m_spatialize) { - Apply3DAttenuation(listenerPosition); - volume *= m_volume; - } + const std::string key = token.substr(0, eqPos); + const std::string value = token.substr(eqPos + 1); - Audio::uint32 clipChannels = m_clip->GetChannels(); - Audio::uint64 totalSamples = static_cast(m_decodedData.size()); - Audio::uint64 samplesPerFrame = sampleCount * channels; - - for (Audio::uint32 i = 0; i < sampleCount; ++i) { - for (Audio::uint32 ch = 0; ch < channels; ++ch) { - Audio::uint64 outputIndex = m_samplePosition + i * channels + ch; - - if (outputIndex >= totalSamples) { - if (m_isLooping && totalSamples > 0) { - outputIndex = outputIndex % totalSamples; - } else { - buffer[i * channels + ch] += 0.0f; - continue; - } - } - - Audio::uint64 decodedChannel = (ch < clipChannels) ? ch : (clipChannels - 1); - Audio::uint64 decodedIndex = (outputIndex / channels) * clipChannels + decodedChannel; - float sample = m_decodedData[decodedIndex]; - - buffer[i * channels + ch] += sample * volume; + if (key == "clipPath") { + pendingClipPath = value; + } else if (key == "clipRef") { + TryDecodeAssetRef(value, pendingClipRef); + } else if (key == "volume") { + SetVolume(std::stof(value)); + } else if (key == "pitch") { + SetPitch(std::stof(value)); + } else if (key == "pan") { + SetPan(std::stof(value)); + } else if (key == "looping") { + SetLooping(std::stoi(value) != 0); + } else if (key == "spatialize") { + SetSpatialize(std::stoi(value) != 0); + } else if (key == "hrtfEnabled") { + SetHRTFEnabled(std::stoi(value) != 0); + } else if (key == "hrtfCrossFeed") { + SetHRTFCrossFeed(std::stof(value)); + } else if (key == "hrtfQuality") { + SetHRTFQuality(static_cast(std::stoul(value))); + } else if (key == "dopplerLevel") { + params.dopplerLevel = std::stof(value); + } else if (key == "speedOfSound") { + params.speedOfSound = std::stof(value); + } else if (key == "minDistance") { + params.minDistance = std::stof(value); + } else if (key == "maxDistance") { + params.maxDistance = std::stof(value); + } else if (key == "panLevel") { + params.panLevel = std::stof(value); + } else if (key == "spread") { + params.spread = std::stof(value); + } else if (key == "reverbZoneMix") { + params.reverbZoneMix = std::stof(value); } } - m_samplePosition += samplesPerFrame; + Set3DParams(params); - if (m_samplePosition >= totalSamples) { + if (pendingClipRef.IsValid()) { + m_clipRef = pendingClipRef; + m_clipHandle = Resources::ResourceManager::Get().Load(pendingClipRef); + m_clip = m_clipHandle.Get(); + if (m_clip != nullptr) { + m_clipPath = ToStdString(m_clip->GetPath()); + if (m_clip->IsValid()) { + static_cast(m_clip->GetDecodedPCMData()); + } + } else { + Containers::String resolvedPath; + if (Resources::ResourceManager::Get().TryResolveAssetPath(pendingClipRef, resolvedPath)) { + SetClipPath(ToStdString(resolvedPath)); + m_clipRef = pendingClipRef; + } + } + } + + if (m_clip == nullptr && !pendingClipPath.empty() && HasVirtualPathScheme(pendingClipPath)) { + SetClipPath(pendingClipPath); + } +} + +void AudioSourceComponent::ProcessAudio(float* buffer, Audio::uint32 frameCount, Audio::uint32 channels, + const Math::Vector3& listenerPosition, + const Math::Quaternion& listenerRotation, + const Math::Vector3& listenerVelocity, + float listenerDopplerLevel, + float speedOfSound, + Audio::uint32 outputSampleRate) { + if (m_playState != Audio::PlayState::Playing || !m_clip) { + return; + } + + if (channels == 0 || frameCount == 0) { + return; + } + + const auto& decodedData = m_clip->GetDecodedPCMData(); + if (decodedData.empty()) { + return; + } + + const float attenuation = m_spatialize ? Compute3DAttenuation(listenerPosition) : 1.0f; + const float volume = m_volume * attenuation; + if (volume <= 0.0f) { + return; + } + + const Audio::uint32 clipChannels = m_clip->GetChannels(); + if (clipChannels == 0) { + return; + } + + const Audio::uint64 totalFrames = static_cast(decodedData.size()) / clipChannels; + if (totalFrames == 0) { + return; + } + + const Audio::uint32 clipSampleRate = m_clip->GetSampleRate(); + if (clipSampleRate == 0) { + return; + } + + const Audio::uint32 mixSampleRate = outputSampleRate > 0 ? outputSampleRate : clipSampleRate; + if (mixSampleRate == 0) { + return; + } + + double playbackRate = + (static_cast(clipSampleRate) / static_cast(mixSampleRate)) * + static_cast(m_pitch); + playbackRate *= ComputeDopplerFactor( + listenerPosition, + listenerVelocity, + listenerDopplerLevel, + speedOfSound); + + playbackRate = std::max(playbackRate, 0.0); + + const bool useHRTF = + m_useHRTF && m_spatialize && m_gameObject && clipChannels == 1 && channels >= 2; + float spatialPan = 0.0f; + if (!useHRTF && m_spatialize) { + spatialPan = ComputeSpatialPan(listenerPosition, listenerRotation); + } + const float combinedPan = std::clamp(m_pan + spatialPan, -1.0f, 1.0f); + float leftPanGain = 1.0f; + float rightPanGain = 1.0f; + ComputePanGains(combinedPan, leftPanGain, rightPanGain); + + double playbackPosition = m_playbackPosition; + bool reachedClipEnd = false; + Audio::uint32 renderedFrameCount = 0; + + for (Audio::uint32 i = 0; i < frameCount; ++i) { + if (!m_isLooping && playbackPosition >= static_cast(totalFrames)) { + reachedClipEnd = true; + break; + } + + const double sourceFrame = m_isLooping + ? WrapFramePosition(playbackPosition, totalFrames) + : playbackPosition; + + if (channels >= 2) { + if (clipChannels == 1) { + const float sample = SampleDecodedChannel( + decodedData, + totalFrames, + clipChannels, + sourceFrame, + 0, + m_isLooping); + if (useHRTF) { + buffer[i * channels] += sample * volume; + buffer[i * channels + 1] += sample * volume; + } else { + buffer[i * channels] += sample * volume * leftPanGain; + buffer[i * channels + 1] += sample * volume * rightPanGain; + } + + for (Audio::uint32 ch = 2; ch < channels; ++ch) { + buffer[i * channels + ch] += sample * volume; + } + } else { + float leftSample = SampleDecodedChannel( + decodedData, + totalFrames, + clipChannels, + sourceFrame, + 0, + m_isLooping); + float rightSample = SampleDecodedChannel( + decodedData, + totalFrames, + clipChannels, + sourceFrame, + 1, + m_isLooping); + + ApplyStereoSpread( + leftSample, + rightSample, + m_spatialize ? m_3DParams.spread : 1.0f, + leftSample, + rightSample); + + float stereoLeftGain = leftPanGain; + float stereoRightGain = rightPanGain; + if (m_spatialize) { + const float stereoPan = combinedPan * (1.0f - 0.5f * m_3DParams.spread); + ComputePanGains(stereoPan, stereoLeftGain, stereoRightGain); + } + + buffer[i * channels] += leftSample * volume * stereoLeftGain; + buffer[i * channels + 1] += rightSample * volume * stereoRightGain; + + for (Audio::uint32 ch = 2; ch < channels; ++ch) { + const float sample = SampleDecodedChannel( + decodedData, + totalFrames, + clipChannels, + sourceFrame, + std::min(ch, clipChannels - 1), + m_isLooping); + buffer[i * channels + ch] += sample * volume; + } + } + } else { + for (Audio::uint32 ch = 0; ch < channels; ++ch) { + const float sample = SampleDecodedChannel( + decodedData, + totalFrames, + clipChannels, + sourceFrame, + std::min(ch, clipChannels - 1), + m_isLooping); + buffer[i * channels + ch] += sample * volume; + } + } + + renderedFrameCount++; + + playbackPosition += playbackRate; + if (m_isLooping && playbackPosition >= static_cast(totalFrames)) { + playbackPosition = WrapFramePosition(playbackPosition, totalFrames); + } else if (!m_isLooping && playbackPosition >= static_cast(totalFrames)) { + reachedClipEnd = true; + break; + } + } + + if (useHRTF && renderedFrameCount > 0) { + m_hrtf.SetSampleRate(mixSampleRate); + m_hrtf.SetSpeedOfSound(speedOfSound); + m_hrtf.ProcessAudio( + buffer, + renderedFrameCount, + channels, + transform().GetPosition(), + listenerPosition, + listenerRotation); + + if (std::abs(m_pan) > Math::EPSILON) { + ApplyPanToBuffer(buffer, renderedFrameCount, channels, m_pan); + } + } + + m_playbackPosition = playbackPosition; + m_samplePosition = static_cast(m_playbackPosition); + m_lastingTime = m_playbackPosition / static_cast(clipSampleRate); + + if (reachedClipEnd) { if (m_isLooping) { - m_samplePosition = m_samplePosition % totalSamples; + m_playbackPosition = WrapFramePosition(m_playbackPosition, totalFrames); + m_samplePosition = static_cast(m_playbackPosition); + m_lastingTime = m_playbackPosition / static_cast(clipSampleRate); } else { Stop(); } } if (m_isEnergyDetecting) { - UpdateEnergy(buffer, sampleCount * channels); + UpdateEnergy(buffer, renderedFrameCount * channels); } } -void AudioSourceComponent::Apply3DAttenuation(const Math::Vector3& listenerPosition) { +float AudioSourceComponent::Compute3DAttenuation(const Math::Vector3& listenerPosition) const { if (!m_gameObject) { - return; + return 1.0f; } - Math::Vector3 sourcePosition = transform().GetPosition(); - Math::Vector3 direction = sourcePosition - listenerPosition; - float distance = direction.Magnitude(); + const Math::Vector3 sourcePosition = transform().GetPosition(); + const Math::Vector3 direction = sourcePosition - listenerPosition; + const float distance = direction.Magnitude(); + const float minDistance = std::max(0.0f, m_3DParams.minDistance); + const float maxDistance = std::max(minDistance, m_3DParams.maxDistance); - if (distance > m_3DParams.maxDistance) { - m_volume = 0.0f; - return; + if (distance <= minDistance) { + return 1.0f; } - float normalizedDistance = distance / m_3DParams.maxDistance; + if (distance >= maxDistance) { + return 0.0f; + } + + float normalizedDistance = 0.0f; + if (maxDistance > minDistance) { + normalizedDistance = (distance - minDistance) / (maxDistance - minDistance); + } normalizedDistance = std::max(0.0f, std::min(1.0f, normalizedDistance)); float attenuation = 1.0f - normalizedDistance; attenuation = std::pow(attenuation, 2.0f); + return attenuation; +} - m_volume *= attenuation; +float AudioSourceComponent::ComputeSpatialPan(const Math::Vector3& listenerPosition, + const Math::Quaternion& listenerRotation) const { + if (!m_gameObject) { + return 0.0f; + } + + const Math::Vector3 sourcePosition = transform().GetPosition(); + const Math::Vector3 toSource = sourcePosition - listenerPosition; + const float distance = toSource.Magnitude(); + if (distance <= Math::EPSILON) { + return 0.0f; + } + + const Math::Vector3 listenerSpaceDirection = + listenerRotation.Inverse() * (toSource / distance); + return std::clamp(listenerSpaceDirection.x, -1.0f, 1.0f) * + std::clamp(m_3DParams.panLevel, 0.0f, 1.0f); +} + +double AudioSourceComponent::ComputeDopplerFactor(const Math::Vector3& listenerPosition, + const Math::Vector3& listenerVelocity, + float listenerDopplerLevel, + float speedOfSound) const { + if (!m_spatialize || !m_gameObject) { + return 1.0; + } + + const Math::Vector3 sourcePosition = transform().GetPosition(); + const Math::Vector3 toSource = sourcePosition - listenerPosition; + const float distance = toSource.Magnitude(); + if (distance <= Math::EPSILON) { + return 1.0; + } + + const float dopplerScale = + std::max(0.0f, listenerDopplerLevel) * std::max(0.0f, m_3DParams.dopplerLevel); + if (dopplerScale <= 0.0f) { + return 1.0; + } + + const Math::Vector3 direction = toSource / distance; + const float effectiveSpeedOfSound = std::max(1.0f, speedOfSound); + const float velocityClamp = effectiveSpeedOfSound * 0.95f; + const float listenerTowardSource = Math::Vector3::Dot(listenerVelocity, direction); + const float sourceTowardListener = -Math::Vector3::Dot(m_velocity, direction); + + const float scaledListenerVelocity = std::clamp( + listenerTowardSource * dopplerScale, + -velocityClamp, + velocityClamp); + const float scaledSourceVelocity = std::clamp( + sourceTowardListener * dopplerScale, + -velocityClamp, + velocityClamp); + + const double numerator = static_cast(effectiveSpeedOfSound + scaledListenerVelocity); + const double denominator = std::max( + 1.0, + static_cast(effectiveSpeedOfSound - scaledSourceVelocity)); + return std::clamp(numerator / denominator, 0.25, 4.0); } void AudioSourceComponent::UpdateEnergy(const float* buffer, Audio::uint32 sampleCount) { diff --git a/engine/src/Resources/AudioClip/AudioClip.cpp b/engine/src/Resources/AudioClip/AudioClip.cpp index b920e784..fce38c2e 100644 --- a/engine/src/Resources/AudioClip/AudioClip.cpp +++ b/engine/src/Resources/AudioClip/AudioClip.cpp @@ -8,19 +8,151 @@ AudioClip::AudioClip() = default; AudioClip::~AudioClip() = default; void AudioClip::Release() { - m_audioData.Clear(); + m_pcmData.Clear(); + m_decodedPCMData.clear(); + m_decodedPCMValid = false; m_rhiResource = nullptr; + m_sampleRate = 44100; + m_channels = 2; + m_bitsPerSample = 16; + m_duration = 0.0f; + m_format = AudioFormat::Unknown; + m_audioType = AudioType::SoundEffect; + m_is3D = false; + m_loop = false; m_isValid = false; + m_memorySize = 0; } -void AudioClip::SetAudioData(const Containers::Array& data) { - m_audioData = data; - - if (m_sampleRate > 0 && m_channels > 0 && m_bitsPerSample > 0) { - size_t bytesPerSample = m_bitsPerSample / 8; - size_t totalSamples = data.Size() / bytesPerSample; - m_duration = static_cast(totalSamples) / static_cast(m_sampleRate); +void AudioClip::SetPCMData(const Containers::Array& data) { + m_pcmData = data; + InvalidateDecodedPCMData(); + RefreshDerivedData(); +} + +void AudioClip::SetSampleRate(Core::uint32 rate) { + m_sampleRate = rate; + InvalidateDecodedPCMData(); + RefreshDerivedData(); +} + +void AudioClip::SetChannels(Core::uint32 channels) { + m_channels = channels; + InvalidateDecodedPCMData(); + RefreshDerivedData(); +} + +void AudioClip::SetBitsPerSample(Core::uint32 bits) { + m_bitsPerSample = bits; + InvalidateDecodedPCMData(); + RefreshDerivedData(); +} + +const std::vector& AudioClip::GetDecodedPCMData() const { + if (!m_decodedPCMValid) { + BuildDecodedPCMData(); } + return m_decodedPCMData; +} + +void AudioClip::RefreshDerivedData() { + if (m_sampleRate > 0 && m_channels > 0 && m_bitsPerSample > 0) { + const size_t bytesPerFrame = + static_cast(m_channels) * (static_cast(m_bitsPerSample) / 8u); + if (bytesPerFrame > 0) { + const size_t totalFrames = m_pcmData.Size() / bytesPerFrame; + m_duration = static_cast(totalFrames) / static_cast(m_sampleRate); + RefreshMemorySize(); + return; + } + } + + m_duration = 0.0f; + RefreshMemorySize(); +} + +void AudioClip::RefreshMemorySize() { + m_memorySize = sizeof(AudioClip) + + m_name.Length() + + m_path.Length() + + m_pcmData.Size() + + m_decodedPCMData.size() * sizeof(float); +} + +void AudioClip::InvalidateDecodedPCMData() { + m_decodedPCMData.clear(); + m_decodedPCMValid = false; +} + +void AudioClip::BuildDecodedPCMData() const { + m_decodedPCMData.clear(); + + if (m_pcmData.Empty() || m_bitsPerSample == 0) { + m_decodedPCMValid = true; + const_cast(this)->RefreshMemorySize(); + return; + } + + const Core::uint32 bytesPerSample = m_bitsPerSample / 8u; + if (bytesPerSample == 0) { + m_decodedPCMValid = true; + const_cast(this)->RefreshMemorySize(); + return; + } + + const size_t totalSamples = m_pcmData.Size() / bytesPerSample; + m_decodedPCMData.resize(totalSamples, 0.0f); + + const Core::uint8* rawData = m_pcmData.Data(); + if (m_bitsPerSample == 8) { + for (size_t i = 0; i < totalSamples; ++i) { + m_decodedPCMData[i] = (static_cast(rawData[i]) - 128) / 128.0f; + } + } else if (m_bitsPerSample == 16) { + const int16_t* samples16 = reinterpret_cast(rawData); + for (size_t i = 0; i < totalSamples; ++i) { + m_decodedPCMData[i] = samples16[i] / 32768.0f; + } + } else if (m_bitsPerSample == 24) { + for (size_t i = 0; i < totalSamples; ++i) { + int32_t sample = + (static_cast(rawData[i * 3]) | + (static_cast(rawData[i * 3 + 1]) << 8) | + (static_cast(rawData[i * 3 + 2]) << 16)); + if ((sample & 0x00800000) != 0) { + sample |= static_cast(0xFF000000); + } + m_decodedPCMData[i] = sample / 8388608.0f; + } + } else if (m_bitsPerSample == 32) { + const int32_t* samples32 = reinterpret_cast(rawData); + for (size_t i = 0; i < totalSamples; ++i) { + m_decodedPCMData[i] = samples32[i] / 2147483648.0f; + } + } else { + m_decodedPCMData.clear(); + } + + m_decodedPCMValid = true; + const_cast(this)->RefreshMemorySize(); +} + +Core::uint64 AudioClip::GetFrameCount() const { + if (m_channels == 0 || m_bitsPerSample == 0) { + return 0; + } + + const Core::uint64 bytesPerFrame = + static_cast(m_channels) * (static_cast(m_bitsPerSample) / 8u); + if (bytesPerFrame == 0) { + return 0; + } + + return static_cast(m_pcmData.Size()) / bytesPerFrame; +} + +Core::uint64 AudioClip::GetSampleCount() const { + return GetFrameCount() * static_cast(m_channels); } void AudioClip::SetRHIResource(class IRHIAudioBuffer* resource) { diff --git a/tests/Components/test_audio_source_component.cpp b/tests/Components/test_audio_source_component.cpp new file mode 100644 index 00000000..6d5e3f46 --- /dev/null +++ b/tests/Components/test_audio_source_component.cpp @@ -0,0 +1,511 @@ +#include + +#include +#include +#include +#include +#include + +using namespace XCEngine::Components; +using namespace XCEngine::Resources; + +namespace { + +AudioClip CreateMono16Clip(std::initializer_list samples, XCEngine::Core::uint32 sampleRate = 4) { + AudioClip clip; + XCEngine::Containers::Array pcmData; + pcmData.ResizeUninitialized(samples.size() * sizeof(int16_t)); + + size_t byteOffset = 0; + for (const int16_t sample : samples) { + const uint16_t encoded = static_cast(sample); + pcmData[byteOffset++] = static_cast(encoded & 0xFFu); + pcmData[byteOffset++] = static_cast((encoded >> 8) & 0xFFu); + } + + clip.SetSampleRate(sampleRate); + clip.SetChannels(1); + clip.SetBitsPerSample(16); + clip.SetAudioFormat(AudioFormat::WAV); + clip.SetPCMData(pcmData); + clip.m_isValid = true; + return clip; +} + +AudioClip CreateStereo16Clip(std::initializer_list samples, XCEngine::Core::uint32 sampleRate = 4) { + AudioClip clip; + XCEngine::Containers::Array pcmData; + pcmData.ResizeUninitialized(samples.size() * sizeof(int16_t)); + + size_t byteOffset = 0; + for (const int16_t sample : samples) { + const uint16_t encoded = static_cast(sample); + pcmData[byteOffset++] = static_cast(encoded & 0xFFu); + pcmData[byteOffset++] = static_cast((encoded >> 8) & 0xFFu); + } + + clip.SetSampleRate(sampleRate); + clip.SetChannels(2); + clip.SetBitsPerSample(16); + clip.SetAudioFormat(AudioFormat::WAV); + clip.SetPCMData(pcmData); + clip.m_isValid = true; + return clip; +} + +TEST(AudioSourceComponent, MonoClipMapsToStereoOutput) { + AudioClip clip = CreateMono16Clip({32767, -32768}, 4); + + AudioSourceComponent source; + source.SetSpatialize(false); + source.SetClip(&clip); + source.Play(); + + float buffer[4] = {}; + source.ProcessAudio( + buffer, + 2, + 2, + XCEngine::Math::Vector3::Zero(), + XCEngine::Math::Quaternion::Identity()); + + EXPECT_NEAR(buffer[0], 32767.0f / 32768.0f, 1e-5f); + EXPECT_NEAR(buffer[1], 32767.0f / 32768.0f, 1e-5f); + EXPECT_NEAR(buffer[2], -1.0f, 1e-5f); + EXPECT_NEAR(buffer[3], -1.0f, 1e-5f); + EXPECT_FALSE(source.IsPlaying()); + EXPECT_FLOAT_EQ(source.GetTime(), 0.0f); +} + +TEST(AudioSourceComponent, PauseSilencesUntilResumed) { + AudioClip clip = CreateMono16Clip({32767, 16384}, 4); + + AudioSourceComponent source; + source.SetSpatialize(false); + source.SetClip(&clip); + source.Play(); + source.Pause(); + + float pausedBuffer[1] = {}; + source.ProcessAudio( + pausedBuffer, + 1, + 1, + XCEngine::Math::Vector3::Zero(), + XCEngine::Math::Quaternion::Identity()); + EXPECT_FLOAT_EQ(pausedBuffer[0], 0.0f); + + source.Play(); + + float resumedBuffer[1] = {}; + source.ProcessAudio( + resumedBuffer, + 1, + 1, + XCEngine::Math::Vector3::Zero(), + XCEngine::Math::Quaternion::Identity()); + EXPECT_NEAR(resumedBuffer[0], 32767.0f / 32768.0f, 1e-5f); +} + +TEST(AudioSourceComponent, LoopingWrapsAtClipEnd) { + AudioClip clip = CreateMono16Clip({32767, -32768}, 4); + + AudioSourceComponent source; + source.SetSpatialize(false); + source.SetLooping(true); + source.SetClip(&clip); + source.Play(); + + float buffer[4] = {}; + source.ProcessAudio( + buffer, + 4, + 1, + XCEngine::Math::Vector3::Zero(), + XCEngine::Math::Quaternion::Identity()); + + EXPECT_NEAR(buffer[0], 32767.0f / 32768.0f, 1e-5f); + EXPECT_NEAR(buffer[1], -1.0f, 1e-5f); + EXPECT_NEAR(buffer[2], 32767.0f / 32768.0f, 1e-5f); + EXPECT_NEAR(buffer[3], -1.0f, 1e-5f); + EXPECT_TRUE(source.IsPlaying()); +} + +TEST(AudioSourceComponent, PitchSupportsFractionalFramePlayback) { + AudioClip clip = CreateMono16Clip({0, 32767, 0}, 4); + + AudioSourceComponent source; + source.SetSpatialize(false); + source.SetPitch(0.5f); + source.SetClip(&clip); + source.Play(); + + float buffer[3] = {}; + source.ProcessAudio( + buffer, + 3, + 1, + XCEngine::Math::Vector3::Zero(), + XCEngine::Math::Quaternion::Identity()); + + const float peak = 32767.0f / 32768.0f; + EXPECT_NEAR(buffer[0], 0.0f, 1e-5f); + EXPECT_NEAR(buffer[1], peak * 0.5f, 1e-4f); + EXPECT_NEAR(buffer[2], peak, 1e-5f); + EXPECT_TRUE(source.IsPlaying()); + EXPECT_NEAR(source.GetTime(), 0.375f, 1e-5f); +} + +TEST(AudioSourceComponent, PanControlsStereoBalanceForMonoClip) { + AudioClip clip = CreateMono16Clip({32767}, 4); + + AudioSourceComponent leftSource; + leftSource.SetSpatialize(false); + leftSource.SetPan(-1.0f); + leftSource.SetClip(&clip); + leftSource.Play(); + + float leftBuffer[2] = {}; + leftSource.ProcessAudio( + leftBuffer, + 1, + 2, + XCEngine::Math::Vector3::Zero(), + XCEngine::Math::Quaternion::Identity()); + + EXPECT_NEAR(leftBuffer[0], 32767.0f / 32768.0f, 1e-5f); + EXPECT_NEAR(leftBuffer[1], 0.0f, 1e-5f); + + AudioSourceComponent rightSource; + rightSource.SetSpatialize(false); + rightSource.SetPan(1.0f); + rightSource.SetClip(&clip); + rightSource.Play(); + + float rightBuffer[2] = {}; + rightSource.ProcessAudio( + rightBuffer, + 1, + 2, + XCEngine::Math::Vector3::Zero(), + XCEngine::Math::Quaternion::Identity()); + + EXPECT_NEAR(rightBuffer[0], 0.0f, 1e-5f); + EXPECT_NEAR(rightBuffer[1], 32767.0f / 32768.0f, 1e-5f); +} + +TEST(AudioSourceComponent, DopplerAdjustsPlaybackRateFromListenerVelocity) { + AudioClip clip = CreateMono16Clip({0, 32767, 0, 0, 0}, 4); + + GameObject sourceObject("AudioSourceObject"); + sourceObject.GetTransform()->SetPosition(XCEngine::Math::Vector3(10.0f, 0.0f, 0.0f)); + + auto* source = sourceObject.AddComponent(); + XCEngine::Audio::Audio3DParams params; + params.minDistance = 100.0f; + params.maxDistance = 100000.0f; + source->Set3DParams(params); + source->SetClip(&clip); + source->Play(); + + float buffer[2] = {}; + source->ProcessAudio( + buffer, + 2, + 1, + XCEngine::Math::Vector3::Zero(), + XCEngine::Math::Quaternion::Identity(), + XCEngine::Math::Vector3(10.0f, 0.0f, 0.0f), + 1.0f, + 20.0f, + 4); + + const float peak = 32767.0f / 32768.0f; + EXPECT_NEAR(buffer[0], 0.0f, 1e-5f); + EXPECT_NEAR(buffer[1], peak * 0.5f, 1e-3f); + EXPECT_TRUE(source->IsPlaying()); +} + +TEST(AudioSourceComponent, SourceVelocityAdjustsDopplerWhenMovingAway) { + AudioClip clip = CreateMono16Clip({0, 32767, 0}, 4); + + GameObject sourceObject("MovingAudioSource"); + sourceObject.GetTransform()->SetPosition(XCEngine::Math::Vector3(10.0f, 0.0f, 0.0f)); + + auto* source = sourceObject.AddComponent(); + XCEngine::Audio::Audio3DParams params; + params.minDistance = 100.0f; + params.maxDistance = 100000.0f; + source->Set3DParams(params); + source->SetClip(&clip); + source->Play(); + + source->Update(0.0f); + sourceObject.GetTransform()->SetPosition(XCEngine::Math::Vector3(20.0f, 0.0f, 0.0f)); + source->Update(1.0f); + + float buffer[2] = {}; + source->ProcessAudio( + buffer, + 2, + 1, + XCEngine::Math::Vector3::Zero(), + XCEngine::Math::Quaternion::Identity(), + XCEngine::Math::Vector3::Zero(), + 1.0f, + 20.0f, + 4); + + const float peak = 32767.0f / 32768.0f; + EXPECT_NEAR(buffer[0], 0.0f, 1e-5f); + EXPECT_NEAR(buffer[1], peak * (20.0f / 30.0f), 1e-3f); +} + +TEST(AudioSourceComponent, SpatialPanUsesListenerRotationAndPanLevel) { + AudioClip clip = CreateMono16Clip({32767}, 4); + + GameObject rightSourceObject("RightSource"); + rightSourceObject.GetTransform()->SetPosition(XCEngine::Math::Vector3(10.0f, 0.0f, 0.0f)); + + auto* rightSource = rightSourceObject.AddComponent(); + XCEngine::Audio::Audio3DParams params; + params.minDistance = 0.0f; + params.maxDistance = 100000.0f; + params.panLevel = 1.0f; + rightSource->Set3DParams(params); + rightSource->SetClip(&clip); + rightSource->Play(); + + float identityBuffer[2] = {}; + rightSource->ProcessAudio( + identityBuffer, + 1, + 2, + XCEngine::Math::Vector3::Zero(), + XCEngine::Math::Quaternion::Identity()); + + EXPECT_GT(identityBuffer[1], identityBuffer[0]); + + GameObject rotatedSourceObject("RotatedRightSource"); + rotatedSourceObject.GetTransform()->SetPosition(XCEngine::Math::Vector3(10.0f, 0.0f, 0.0f)); + + auto* rotatedSource = rotatedSourceObject.AddComponent(); + rotatedSource->Set3DParams(params); + rotatedSource->SetClip(&clip); + rotatedSource->Play(); + + float rotatedBuffer[2] = {}; + rotatedSource->ProcessAudio( + rotatedBuffer, + 1, + 2, + XCEngine::Math::Vector3::Zero(), + XCEngine::Math::Quaternion::FromEulerAngles(0.0f, XCEngine::Math::PI, 0.0f)); + + EXPECT_GT(rotatedBuffer[0], rotatedBuffer[1]); + + GameObject neutralSourceObject("NeutralPanSource"); + neutralSourceObject.GetTransform()->SetPosition(XCEngine::Math::Vector3(10.0f, 0.0f, 0.0f)); + + auto* neutralSource = neutralSourceObject.AddComponent(); + params.panLevel = 0.0f; + neutralSource->Set3DParams(params); + neutralSource->SetClip(&clip); + neutralSource->Play(); + + float neutralBuffer[2] = {}; + neutralSource->ProcessAudio( + neutralBuffer, + 1, + 2, + XCEngine::Math::Vector3::Zero(), + XCEngine::Math::Quaternion::Identity()); + + EXPECT_NEAR(neutralBuffer[0], neutralBuffer[1], 1e-5f); +} + +TEST(AudioSourceComponent, SpreadControlsStereoWidthWhenSpatialized) { + AudioClip clip = CreateStereo16Clip({32767, 0}, 4); + + GameObject narrowSourceObject("NarrowStereoSource"); + narrowSourceObject.GetTransform()->SetPosition(XCEngine::Math::Vector3::Zero()); + + auto* narrowSource = narrowSourceObject.AddComponent(); + XCEngine::Audio::Audio3DParams narrowParams; + narrowParams.minDistance = 0.0f; + narrowParams.maxDistance = 100000.0f; + narrowParams.spread = 0.0f; + narrowSource->Set3DParams(narrowParams); + narrowSource->SetClip(&clip); + narrowSource->Play(); + + float narrowBuffer[2] = {}; + narrowSource->ProcessAudio( + narrowBuffer, + 1, + 2, + XCEngine::Math::Vector3::Zero(), + XCEngine::Math::Quaternion::Identity()); + + const float peak = 32767.0f / 32768.0f; + EXPECT_NEAR(narrowBuffer[0], peak * 0.5f, 1e-5f); + EXPECT_NEAR(narrowBuffer[1], peak * 0.5f, 1e-5f); + + GameObject wideSourceObject("WideStereoSource"); + wideSourceObject.GetTransform()->SetPosition(XCEngine::Math::Vector3::Zero()); + + auto* wideSource = wideSourceObject.AddComponent(); + XCEngine::Audio::Audio3DParams wideParams = narrowParams; + wideParams.spread = 1.0f; + wideSource->Set3DParams(wideParams); + wideSource->SetClip(&clip); + wideSource->Play(); + + float wideBuffer[2] = {}; + wideSource->ProcessAudio( + wideBuffer, + 1, + 2, + XCEngine::Math::Vector3::Zero(), + XCEngine::Math::Quaternion::Identity()); + + EXPECT_NEAR(wideBuffer[0], peak, 1e-5f); + EXPECT_NEAR(wideBuffer[1], 0.0f, 1e-5f); +} + +TEST(AudioSourceComponent, HRTFSpatializesMonoSourceOnStereoOutput) { + AudioClip clip = CreateMono16Clip({32767}, 4); + + XCEngine::Audio::Audio3DParams params; + params.minDistance = 100.0f; + params.maxDistance = 100000.0f; + params.panLevel = 1.0f; + + GameObject baselineObject("BaselineSpatialSource"); + baselineObject.GetTransform()->SetPosition(XCEngine::Math::Vector3(10.0f, 0.0f, 0.0f)); + auto* baselineSource = baselineObject.AddComponent(); + baselineSource->Set3DParams(params); + baselineSource->SetClip(&clip); + baselineSource->Play(); + + float baselineBuffer[2] = {}; + baselineSource->ProcessAudio( + baselineBuffer, + 1, + 2, + XCEngine::Math::Vector3::Zero(), + XCEngine::Math::Quaternion::Identity()); + + EXPECT_NEAR(baselineBuffer[0], 0.0f, 1e-5f); + EXPECT_GT(baselineBuffer[1], 0.0f); + + GameObject hrtfObject("HRTFSpatialSource"); + hrtfObject.GetTransform()->SetPosition(XCEngine::Math::Vector3(10.0f, 0.0f, 0.0f)); + auto* hrtfSource = hrtfObject.AddComponent(); + hrtfSource->Set3DParams(params); + hrtfSource->SetHRTFEnabled(true); + hrtfSource->SetHRTFCrossFeed(0.25f); + hrtfSource->SetClip(&clip); + hrtfSource->Play(); + + float hrtfBuffer[2] = {}; + hrtfSource->ProcessAudio( + hrtfBuffer, + 1, + 2, + XCEngine::Math::Vector3::Zero(), + XCEngine::Math::Quaternion::Identity()); + + EXPECT_GT(hrtfBuffer[0], baselineBuffer[0]); + EXPECT_GT(hrtfBuffer[0], 0.0f); + EXPECT_GT(hrtfBuffer[1], hrtfBuffer[0]); +} + +TEST(AudioSourceComponent, MultipleSourcesCanReuseSameClipDecodedCache) { + AudioClip clip = CreateMono16Clip({32767, -32768}, 4); + const float* decodedBuffer = clip.GetDecodedPCMData().data(); + + AudioSourceComponent sourceA; + sourceA.SetSpatialize(false); + sourceA.SetClip(&clip); + sourceA.Play(); + + AudioSourceComponent sourceB; + sourceB.SetSpatialize(false); + sourceB.SetClip(&clip); + sourceB.Play(); + + float bufferA[2] = {}; + float bufferB[2] = {}; + sourceA.ProcessAudio( + bufferA, + 2, + 1, + XCEngine::Math::Vector3::Zero(), + XCEngine::Math::Quaternion::Identity()); + sourceB.ProcessAudio( + bufferB, + 2, + 1, + XCEngine::Math::Vector3::Zero(), + XCEngine::Math::Quaternion::Identity()); + + EXPECT_EQ(decodedBuffer, clip.GetDecodedPCMData().data()); + EXPECT_NEAR(bufferA[0], 32767.0f / 32768.0f, 1e-5f); + EXPECT_NEAR(bufferA[1], -1.0f, 1e-5f); + EXPECT_NEAR(bufferB[0], 32767.0f / 32768.0f, 1e-5f); + EXPECT_NEAR(bufferB[1], -1.0f, 1e-5f); +} + +TEST(AudioSourceComponent, SerializeRoundTripPreservesClipPathAndSpatialSettings) { + AudioClip clip = CreateMono16Clip({32767}, 4); + clip.m_path = "test://audio/runtime.wav"; + clip.m_name = "runtime.wav"; + + AudioSourceComponent source; + source.SetClip(&clip); + source.SetVolume(0.75f); + source.SetPitch(1.5f); + source.SetPan(-0.25f); + source.SetLooping(true); + source.SetSpatialize(true); + source.SetHRTFEnabled(true); + source.SetHRTFCrossFeed(0.4f); + source.SetHRTFQuality(3); + + XCEngine::Audio::Audio3DParams params; + params.dopplerLevel = 2.0f; + params.speedOfSound = 280.0f; + params.minDistance = 2.0f; + params.maxDistance = 64.0f; + params.panLevel = 0.6f; + params.spread = 0.3f; + params.reverbZoneMix = 0.2f; + source.Set3DParams(params); + + std::stringstream stream; + source.Serialize(stream); + + AudioSourceComponent target; + target.Deserialize(stream); + + EXPECT_EQ(target.GetClip(), nullptr); + EXPECT_EQ(target.GetClipPath(), "test://audio/runtime.wav"); + EXPECT_FLOAT_EQ(target.GetVolume(), 0.75f); + EXPECT_FLOAT_EQ(target.GetPitch(), 1.5f); + EXPECT_FLOAT_EQ(target.GetPan(), -0.25f); + EXPECT_TRUE(target.IsLooping()); + EXPECT_TRUE(target.IsSpatialize()); + EXPECT_TRUE(target.IsHRTFEnabled()); + EXPECT_FLOAT_EQ(target.GetHRTFCrossFeed(), 0.4f); + EXPECT_EQ(target.GetHRTFQuality(), 3u); + EXPECT_FLOAT_EQ(target.GetDopplerLevel(), 2.0f); + EXPECT_FLOAT_EQ(target.Get3DParams().speedOfSound, 280.0f); + EXPECT_FLOAT_EQ(target.Get3DParams().minDistance, 2.0f); + EXPECT_FLOAT_EQ(target.Get3DParams().maxDistance, 64.0f); + EXPECT_FLOAT_EQ(target.Get3DParams().panLevel, 0.6f); + EXPECT_FLOAT_EQ(target.GetSpread(), 0.3f); + EXPECT_FLOAT_EQ(target.GetReverbZoneMix(), 0.2f); +} + +} // namespace diff --git a/tests/Resources/AudioClip/test_audio_clip.cpp b/tests/Resources/AudioClip/test_audio_clip.cpp index ce20a02c..fc6ecd50 100644 --- a/tests/Resources/AudioClip/test_audio_clip.cpp +++ b/tests/Resources/AudioClip/test_audio_clip.cpp @@ -7,6 +7,21 @@ using namespace XCEngine::Resources; namespace { +XCEngine::Containers::Array EncodeMono16PCM(std::initializer_list samples) { + XCEngine::Containers::Array pcmData; + pcmData.ResizeUninitialized(samples.size() * sizeof(int16_t)); + + size_t byteOffset = 0; + for (const int16_t sample : samples) { + const uint16_t encoded = static_cast(sample); + pcmData[byteOffset++] = static_cast(encoded & 0xFFu); + pcmData[byteOffset++] = static_cast((encoded >> 8) & 0xFFu); + } + + return pcmData; +} + + TEST(AudioClip, GetType) { AudioClip audio; EXPECT_EQ(audio.GetType(), ResourceType::AudioClip); @@ -17,4 +32,43 @@ TEST(AudioClip, GetMemorySize) { EXPECT_EQ(audio.GetMemorySize(), 0u); } +TEST(AudioClip, DefaultPCMStatsAreZero) { + AudioClip audio; + EXPECT_EQ(audio.GetPCMDataSize(), 0u); + EXPECT_EQ(audio.GetFrameCount(), 0u); + EXPECT_EQ(audio.GetSampleCount(), 0u); +} + +TEST(AudioClip, DecodedPCMCacheConverts16BitSamplesAndReusesBuffer) { + AudioClip audio; + audio.SetSampleRate(4); + audio.SetChannels(1); + audio.SetBitsPerSample(16); + audio.SetPCMData(EncodeMono16PCM({32767, -32768, 16384})); + + const std::vector& firstDecode = audio.GetDecodedPCMData(); + const std::vector& secondDecode = audio.GetDecodedPCMData(); + + ASSERT_EQ(firstDecode.size(), 3u); + EXPECT_NEAR(firstDecode[0], 32767.0f / 32768.0f, 1e-5f); + EXPECT_NEAR(firstDecode[1], -1.0f, 1e-5f); + EXPECT_NEAR(firstDecode[2], 16384.0f / 32768.0f, 1e-5f); + EXPECT_EQ(firstDecode.data(), secondDecode.data()); +} + +TEST(AudioClip, UpdatingSampleRateRefreshesDurationWithoutReplacingPCMData) { + AudioClip audio; + audio.SetChannels(1); + audio.SetBitsPerSample(16); + audio.SetPCMData(EncodeMono16PCM({1, 2, 3, 4})); + + audio.SetSampleRate(4); + EXPECT_FLOAT_EQ(audio.GetDuration(), 1.0f); + + audio.SetSampleRate(8); + EXPECT_FLOAT_EQ(audio.GetDuration(), 0.5f); + EXPECT_EQ(audio.GetFrameCount(), 4u); + EXPECT_EQ(audio.GetSampleCount(), 4u); +} + } // namespace