Merge pull request #372 from otavepto/patch/opt-voice

Fix performance regression caused by voice chat and disable the feature by default
2026-02-04 05:41:18 +01:00 · 2025-11-03 14:39:12 +01:00
parent db0bc4cd8b eef0a39580
commit c533321eb7
6 changed files with 419 additions and 192 deletions
--- a/dll/dll/settings.h
+++ b/dll/dll/settings.h
@@ -369,6 +369,9 @@ public:
    // free weekend
    bool free_weekend = false;

+    // voice chat
+    bool enable_voice_chat = false;
+

 #ifdef LOBBY_CONNECT
    static constexpr const bool is_lobby_connect = true;
--- a/dll/dll/voicechat.h
+++ b/dll/dll/voicechat.h
@@ -22,43 +22,75 @@
 #include <opus/opus.h>
 #include <portaudio.h>

+// recording: how many mic samples are recorded in 1 second
+// playback: ???
 #define SAMPLE_RATE 48000
-#define CHANNELS 1
-#define FRAME_SIZE 960 // 20ms @ 48kHz
-#define MAX_ENCODED_SIZE 4000
-#define MAX_DECODED_SIZE (FRAME_SIZE * 2 * sizeof(int16_t)) // for stereo
-#define DEFAULT_BITRATE 32000
+// mic/playback channels, steam only support mono mic channels
+// https://partner.steamgames.com/doc/api/ISteamUser#DecompressVoice
+// "The output data is raw single-channel 16-bit PCM audio. The decoder supports any sample rate from 11025 to 48000"
+#define CHANNELS_RECORDING 1
+// stereo output
+#define CHANNELS_PLAYBACK 2
+// https://partner.steamgames.com/doc/api/ISteamUser#GetVoice
+// "It is recommended that you pass in an 8 kilobytes or larger destination buffer for compressed audio"
+#define MAX_ENCODED_SIZE 8192
+// how many mic samples to buffer (internally by Port Audio) before firing our mic callback
+// >>> sample time = (1/48000) = 0.02ms
+// >>> 20ms (desired callback rate) / 0.02ms (sample time) = 960 frames
+#define FRAME_SIZE 960
+// https://opus-codec.org/docs/html_api/group__opusdecoder.html#ga1a8b923c1041ad4976ceada237e117ba
+// "[out] 	pcm 	opus_int16*: Output signal (interleaved if 2 channels). length is frame_size*channels*sizeof(opus_int16)"
+// "[in] 	frame_size 	Number of samples per channel of available space in *pcm, if less than the maximum frame size (120ms) some frames can not be decoded"
+// so we have to account for the worst case scenario which is a max of 120ms frame size
+// >>> sample time = (1/48000) = 0.02ms
+// >>> 120ms (worst callback rate) / 0.02ms (sample time) = 5760 frames
+// >>> 5760 frames (worst case) / 960 frames (our case) = 6
+#define MAX_FRAME_SIZE (FRAME_SIZE * 6)
+#define MAX_DECODED_RECORDING_SIZE (MAX_FRAME_SIZE * CHANNELS_RECORDING)
+#define MAX_DECODED_PLAYBACK_SIZE  (MAX_FRAME_SIZE * CHANNELS_PLAYBACK)

 struct VoicePacket {
-    uint64_t userId;
+    uint64_t userId = 0;
    std::vector<uint8_t> encoded;
 };

 class VoiceChat
 {
+    // is PortAudio lib initialized
+    std::atomic<bool> isSystemInited{ false };
+
+    // --- recording
    std::atomic<bool> isRecording{ false };
-    std::atomic<bool> isPlaying{ false };
-
-    std::mutex inputMutex;
-    std::condition_variable inputCond;
+    std::recursive_mutex inputMutex;
    std::queue<std::vector<uint8_t>> encodedQueue;
-
-    std::mutex playbackQueueMutex;
-
-    std::queue<VoicePacket> playbackQueue;
-
-    std::mutex decoderMapMutex;
-    std::unordered_map<uint64_t, OpusDecoder*> decoderMap;
-
    OpusEncoder* encoder = nullptr;
    PaStream* inputStream = nullptr;
+    // --- recording
+
+    // --- playback
+    std::atomic<bool> isPlaying{ false };
+    std::recursive_mutex playbackQueueMutex;
+    std::queue<VoicePacket> playbackQueue;
+    std::recursive_mutex decoderMapMutex;
+    std::unordered_map<uint64_t, OpusDecoder*> decoderMap; // TODO do we need a decoder for each user?
    PaStream* outputStream = nullptr;
+    // --- playback
+
+    void cleanupVoiceRecordingInternal();
+    void cleanupPlaybackInternal();
+
+    // recording callback
    static int inputCallback(const void* input, void*, unsigned long frameCount,
        const PaStreamCallbackTimeInfo*, PaStreamCallbackFlags, void*);
+
+    // playback callback
    static int outputCallback(const void*, void* output, unsigned long frameCount,
        const PaStreamCallbackTimeInfo*, PaStreamCallbackFlags, void*);

 public:
+    VoiceChat() = default;
+    ~VoiceChat();
+
    bool InitVoiceSystem();

    void ShutdownVoiceSystem();
@@ -79,7 +111,13 @@ public:
        void* pDestBuffer, uint32_t cbDestBufferSize, uint32_t* nBytesWritten,
        uint32_t nDesiredSampleRate);

-    void QueueIncomingVoice(uint64_t userId, const uint8_t* data, size_t len);
+    void QueueAudioPlayback(uint64_t userId, const uint8_t* data, size_t len);
+
+    bool IsVoiceSystemInitialized() const;
+    
+    bool IsRecordingActive() const;
+    
+    bool IsPlaybackActive() const;
 };

-#endif // VOICECHAT_INCLUDE_H
+#endif // VOICECHAT_INCLUDE_H
--- a/dll/settings_parser.cpp
+++ b/dll/settings_parser.cpp
@@ -1505,6 +1505,9 @@ static void parse_simple_features(class Settings *settings_client, class Setting
    settings_client->disable_account_avatar = !ini.GetBoolValue("main::general", "enable_account_avatar", !settings_client->disable_account_avatar);
    settings_server->disable_account_avatar = !ini.GetBoolValue("main::general", "enable_account_avatar", !settings_server->disable_account_avatar);

+    settings_client->enable_voice_chat = ini.GetBoolValue("main::general", "enable_voice_chat", settings_client->enable_voice_chat);
+    settings_server->enable_voice_chat = ini.GetBoolValue("main::general", "enable_voice_chat", settings_server->enable_voice_chat);
+
    settings_client->steam_deck = ini.GetBoolValue("main::general", "steam_deck", settings_client->steam_deck);
    settings_server->steam_deck = ini.GetBoolValue("main::general", "steam_deck", settings_server->steam_deck);

--- a/dll/steam_user.cpp
+++ b/dll/steam_user.cpp
@@ -36,6 +36,7 @@ Steam_User::Steam_User(Settings *settings, Local_Storage *local_storage, class N
 Steam_User::~Steam_User()
 {
    delete auth_manager;
+    delete voicechat;
 }

 // returns the HSteamUser this interface represents
@@ -495,8 +496,15 @@ bool Steam_User::GetUserDataFolder( char *pchBuffer, int cubBuffer )
 // Starts voice recording. Once started, use GetVoice() to get the data
 void Steam_User::StartVoiceRecording( )
 {
-    PRINT_DEBUG_ENTRY();
-    voicechat->StartVoiceRecording();
+    if (!settings->enable_voice_chat) return;
+
+    if (!voicechat->IsRecordingActive()) {
+        PRINT_DEBUG_ENTRY();
+
+        if (voicechat->InitVoiceSystem()) {
+            voicechat->StartVoiceRecording();
+        }
+    }
 }

 // Stops voice recording. Because people often release push-to-talk keys early, the system will keep recording for
@@ -505,6 +513,8 @@ void Steam_User::StartVoiceRecording( )
 void Steam_User::StopVoiceRecording( )
 {
    PRINT_DEBUG_ENTRY();
+    if (!settings->enable_voice_chat) return;
+
    voicechat->StopVoiceRecording();
 }

@@ -515,6 +525,13 @@ void Steam_User::StopVoiceRecording( )
 EVoiceResult Steam_User::GetAvailableVoice( uint32 *pcbCompressed, uint32 *pcbUncompressed_Deprecated, uint32 nUncompressedVoiceDesiredSampleRate_Deprecated  )
 {
    PRINT_DEBUG_ENTRY();
+
+    if (pcbCompressed) *pcbCompressed = 0;
+    if (pcbUncompressed_Deprecated) *pcbUncompressed_Deprecated = 0;
+    if (!settings->enable_voice_chat) return k_EVoiceResultNoData;
+
+    // some games like appid 34330 don't call this
+    StartVoiceRecording();
    return voicechat->GetAvailableVoice(pcbCompressed);
 }

@@ -548,6 +565,14 @@ EVoiceResult Steam_User::GetAvailableVoice(uint32 *pcbCompressed, uint32 *pcbUnc
 EVoiceResult Steam_User::GetVoice( bool bWantCompressed, void *pDestBuffer, uint32 cbDestBufferSize, uint32 *nBytesWritten, bool bWantUncompressed_Deprecated, void *pUncompressedDestBuffer_Deprecated , uint32 cbUncompressedDestBufferSize_Deprecated , uint32 *nUncompressBytesWritten_Deprecated , uint32 nUncompressedVoiceDesiredSampleRate_Deprecated  )
 {
    PRINT_DEBUG_ENTRY();
+    if (nBytesWritten) *nBytesWritten = 0;
+    if (nUncompressBytesWritten_Deprecated) *nUncompressBytesWritten_Deprecated = 0;
+    if (!settings->enable_voice_chat) return k_EVoiceResultNoData;
+
+    // should we have this here ? -detanup
+    // some games might not initialize this.
+    // example appid 34330
+    StartVoiceRecording();
    return voicechat->GetVoice(bWantCompressed, pDestBuffer, cbDestBufferSize, nBytesWritten);
 }

@@ -597,7 +622,7 @@ EVoiceResult Steam_User::DecompressVoice( void *pCompressed, uint32 cbCompressed
 uint32 Steam_User::GetVoiceOptimalSampleRate()
 {
    PRINT_DEBUG_ENTRY();
-    return 48000;
+    return SAMPLE_RATE;
 }

 // Retrieve ticket to be sent to the entity who wishes to authenticate you. 
--- a/dll/voicechat.cpp
+++ b/dll/voicechat.cpp
@@ -1,110 +1,214 @@
 #include "dll/voicechat.h"

-static std::atomic<bool> isInited{ false };
+
+void VoiceChat::cleanupVoiceRecordingInternal()
+{
+    if (inputStream) {
+        Pa_AbortStream(inputStream);
+        Pa_CloseStream(inputStream);
+        inputStream = nullptr;
+        PRINT_DEBUG("Closed input stream");
+    }
+
+    if (encoder) {
+        opus_encoder_destroy(encoder);
+        encoder = nullptr;
+        PRINT_DEBUG("Destroyed input encoder");
+    }
+
+    // this must be in a local scope (even without the lock)
+    // so that the swapped/old buffer gets destroyed
+    {
+        std::lock_guard lock(inputMutex);
+
+        std::queue<std::vector<uint8_t>> empty{};
+        std::swap(encodedQueue, empty);
+    }
+
+    isRecording = false;
+}
+
+void VoiceChat::cleanupPlaybackInternal()
+{
+    if (outputStream) {
+        Pa_AbortStream(outputStream);
+        Pa_CloseStream(outputStream);
+        outputStream = nullptr;
+        PRINT_DEBUG("Closed output stream");
+    }
+
+    {
+        std::lock_guard lock(decoderMapMutex);
+        for (auto& [id, decoder] : decoderMap) {
+            if (decoder) {
+                opus_decoder_destroy(decoder);
+            }
+        }
+        decoderMap.clear();
+    }
+
+    // this must be in a local scope (even without the lock)
+    // so that the swapped/old buffer gets destroyed
+    {
+        std::lock_guard lock(playbackQueueMutex);
+
+        std::queue<VoicePacket> empty{};
+        std::swap(playbackQueue, empty);
+    }
+
+    isPlaying = false;
+}
+
+// https://www.portaudio.com/docs/v19-doxydocs/paex__record_8c_source.html
+int VoiceChat::inputCallback(const void* input, void*, unsigned long frameCount,
+    const PaStreamCallbackTimeInfo*, PaStreamCallbackFlags, void* data) {
+    auto self_ref = reinterpret_cast<VoiceChat*>(data);
+    if (!input || !self_ref->isRecording) return paContinue;
+
+    std::vector<uint8_t> encoded(MAX_ENCODED_SIZE);
+    int len = opus_encode(self_ref->encoder, reinterpret_cast<const int16_t*>(input), frameCount,
+        encoded.data(), encoded.size());
+    if (len > 0) {
+        encoded.resize(len);
+        {
+            std::lock_guard lock(self_ref->inputMutex);
+            self_ref->encodedQueue.emplace(std::move(encoded));
+        }
+    }
+    else {
+        PRINT_DEBUG("[X] Opus encoding failed: %s", opus_strerror(len));
+    }
+    return paContinue;
+}
+
+int VoiceChat::outputCallback(const void*, void* output, unsigned long frameCount /* frames per 1 channel! */,
+    const PaStreamCallbackTimeInfo*, PaStreamCallbackFlags, void* data) {
+    auto self_ref = reinterpret_cast<VoiceChat*>(data);
+    auto out = reinterpret_cast<int16_t*>(output);
+
+    unsigned long remainingFrames = frameCount;
+
+    while (true) {
+        if (remainingFrames <= 0) break;
+
+        VoicePacket pkt{};
+        {
+            std::lock_guard lock(self_ref->playbackQueueMutex);
+
+            if (self_ref->playbackQueue.empty()) break;
+
+            pkt = std::move(self_ref->playbackQueue.front());
+            self_ref->playbackQueue.pop();
+        }
+
+
+        OpusDecoder* decoder = nullptr;
+        {
+            std::lock_guard lock(self_ref->decoderMapMutex);
+
+            auto it_decoder = self_ref->decoderMap.find(pkt.userId);
+            if (self_ref->decoderMap.end() != it_decoder) {
+                decoder = it_decoder->second;
+            }
+            else {
+                int err = 0;
+                // we must decompress using the same parameters used in StartVoicePlayback() when creating the encoder
+                decoder = opus_decoder_create(SAMPLE_RATE, CHANNELS_PLAYBACK, &err);
+                if (err != OPUS_OK || !decoder) {
+                    PRINT_DEBUG("[X] Opus decoder create failed: %s", opus_strerror(err));
+                    continue;
+                }
+
+                self_ref->decoderMap[pkt.userId] = decoder;
+            }
+        }
+
+        auto pcm = std::vector<opus_int16>(MAX_DECODED_PLAYBACK_SIZE);
+        int samplesPerChannel = opus_decode(decoder, (const unsigned char*)pkt.encoded.data(), (int)pkt.encoded.size(),
+            pcm.data(), MAX_FRAME_SIZE, 0);
+        if (samplesPerChannel < 0) {
+            PRINT_DEBUG("[X] Opus decode failed: %s", opus_strerror(samplesPerChannel));
+            break;
+        }
+
+        if ((unsigned long)samplesPerChannel > remainingFrames) {
+            samplesPerChannel = remainingFrames;
+        }
+        // https://opus-codec.org/docs/html_api/group__opusdecoder.html#ga1a8b923c1041ad4976ceada237e117ba
+        // "[out] 	pcm 	opus_int16*: Output signal (interleaved if 2 channels). length is frame_size*channels*sizeof(opus_int16)"
+        uint32_t bytesRequired = samplesPerChannel * CHANNELS_PLAYBACK * sizeof(opus_int16);
+        memcpy(out, pcm.data(), bytesRequired);
+
+        // update the pointers
+        remainingFrames -= (unsigned long)samplesPerChannel;
+        out += samplesPerChannel * CHANNELS_PLAYBACK;
+    }
+
+    return paContinue;
+}
+
+
+// --- !!! ------ !!! ------ !!! ------ !!! ------ !!! ---
+// --- !!! ------ !!! ------ !!! ------ !!! ------ !!! ---
+// don't init PortAudio or any other external libraries in the constructor
+// always do lazy initialization, this makes it less likely to encounter
+// a crash because of these external libraries if the current game isn't
+// even using the Steam recording feature
+// --- !!! ------ !!! ------ !!! ------ !!! ------ !!! ---
+// --- !!! ------ !!! ------ !!! ------ !!! ------ !!! ---
+
+VoiceChat::~VoiceChat()
+{
+    cleanupVoiceRecordingInternal();
+    cleanupPlaybackInternal();
+    ShutdownVoiceSystem();
+}

 bool VoiceChat::InitVoiceSystem() {
-    if (!isInited) {
-        if (Pa_Initialize() != paNoError) {
-            PRINT_DEBUG("PortAudio initialization failed");
-            return false;
-        }
-        isInited = true;
+    if (isSystemInited) return true;
+
+    PaError paErr = Pa_Initialize();
+    if (paErr != paNoError) {
+        PRINT_DEBUG("[X] PortAudio initialization failed: %s", Pa_GetErrorText(paErr));
+        return false;
    }
-    isRecording = false;
-    isPlaying = false;
-    encoder = nullptr;
-    inputStream = nullptr;
-    outputStream = nullptr;
-    PRINT_DEBUG("VoiceSystem initialized!");
+
+    isSystemInited = true;
+    PRINT_DEBUG("Successfully initialized VoiceSystem!");
    return true;
 }

 void VoiceChat::ShutdownVoiceSystem() {
-    if (isInited) {
-        Pa_Terminate();
-        isInited = false;
-        PRINT_DEBUG("VoiceSystem Terminated!");
-    }
-}
+    if (!isSystemInited.exchange(false)) return;

-int VoiceChat::inputCallback(const void* input, void*, unsigned long frameCount,
-    const PaStreamCallbackTimeInfo*, PaStreamCallbackFlags, void* data) {
-    VoiceChat* chat = static_cast<VoiceChat*>(data);
-    if (!input || frameCount != FRAME_SIZE || !chat->isRecording.load()) return paContinue;
-
-    std::vector<uint8_t> encoded(MAX_ENCODED_SIZE);
-    int len = opus_encode(chat->encoder, static_cast<const int16_t*>(input), frameCount,
-        encoded.data(), MAX_ENCODED_SIZE);
-    if (len > 0) {
-        encoded.resize(len);
-        {
-            std::lock_guard<std::mutex> lock(chat->inputMutex);
-            chat->encodedQueue.push(std::move(encoded));
-        }
-        chat->inputCond.notify_one();
-    }
-    else {
-        PRINT_DEBUG("Opus encoding failed: %d", len);
-    }
-    return paContinue;
-}
-
-int VoiceChat::outputCallback(const void*, void* output, unsigned long frameCount,
-    const PaStreamCallbackTimeInfo*, PaStreamCallbackFlags, void* data) {
-    VoiceChat* chat = static_cast<VoiceChat*>(data);
-    int16_t* out = static_cast<int16_t*>(output);
-    memset(out, 0, frameCount * sizeof(int16_t) * 2); // support stereo output
-
-    std::lock_guard<std::mutex> lock(chat->playbackQueueMutex);
-    size_t mixCount = 0;
-
-    while (!chat->playbackQueue.empty()) {
-        VoicePacket pkt = chat->playbackQueue.front();
-        chat->playbackQueue.pop();
-
-        OpusDecoder* decoder = nullptr;
-        {
-            std::lock_guard<std::mutex> dlock(chat->decoderMapMutex);
-            decoder = chat->decoderMap[pkt.userId];
-            if (!decoder) {
-                int err = 0;
-                decoder = opus_decoder_create(SAMPLE_RATE, CHANNELS, &err);
-                if (err != OPUS_OK || !decoder) continue;
-                chat->decoderMap[pkt.userId] = decoder;
-            }
-        }
-
-        int16_t tempBuffer[FRAME_SIZE] = { 0 };
-        int decoded = opus_decode(decoder, pkt.encoded.data(), pkt.encoded.size(), tempBuffer, frameCount, 0);
-        if (decoded > 0) {
-            for (int i = 0; i < decoded; ++i) {
-                out[2 * i] += tempBuffer[i] / 2;     // left
-                out[2 * i + 1] += tempBuffer[i] / 2; // right
-            }
-            ++mixCount;
-        }
-    }
-
-    return paContinue;
+    Pa_Terminate();
+    PRINT_DEBUG("VoiceSystem Terminated!");
 }

 bool VoiceChat::StartVoiceRecording() {
-    if (isRecording.load()) return true;
-    if (!InitVoiceSystem()) return false;
-
-    int err = 0;
-    encoder = opus_encoder_create(SAMPLE_RATE, CHANNELS, OPUS_APPLICATION_VOIP, &err);
-    if (!encoder || err != OPUS_OK) {
-        PRINT_DEBUG("Opus encoder create failed: %d", err);
+    if (isRecording) return true;
+    if (!isSystemInited) {
+        PRINT_DEBUG("[X] VoiceSystem not initialized");
        return false;
    }

-    opus_encoder_ctl(encoder, OPUS_SET_BITRATE(DEFAULT_BITRATE));
+    int err = 0;
+    encoder = opus_encoder_create(SAMPLE_RATE, CHANNELS_RECORDING, OPUS_APPLICATION_VOIP, &err);
+    if (!encoder || err != OPUS_OK) {
+        PRINT_DEBUG("[X] Opus decoder create failed: %s", opus_strerror(err));
+        cleanupVoiceRecordingInternal();
+        return false;
+    }

    PaStreamParameters params{};
    params.device = Pa_GetDefaultInputDevice();
-    if (params.device == paNoDevice) return false;
-    params.channelCount = CHANNELS;
+    if (params.device == paNoDevice) {
+        PRINT_DEBUG("[X] Pa_GetDefaultInputDevice failed (no device)");
+        cleanupVoiceRecordingInternal();
+        return false;
+    }
+
+    params.channelCount = CHANNELS_RECORDING;
    params.sampleFormat = paInt16;
    params.suggestedLatency = Pa_GetDeviceInfo(params.device)->defaultLowInputLatency;
    params.hostApiSpecificStreamInfo = nullptr;
@@ -112,37 +216,46 @@ bool VoiceChat::StartVoiceRecording() {
    PaError paErr = Pa_OpenStream(&inputStream, &params, nullptr, SAMPLE_RATE, FRAME_SIZE,
        paClipOff, inputCallback, this);
    if (paErr != paNoError) {
-        PRINT_DEBUG("Failed to open input stream: %s", Pa_GetErrorText(paErr));
+        PRINT_DEBUG("[X] Failed to open input stream: %s", Pa_GetErrorText(paErr));
+        cleanupVoiceRecordingInternal();
        return false;
    }

-    isRecording.store(true);
-    Pa_StartStream(inputStream);
+    paErr = Pa_StartStream(inputStream);
+    if (paErr != paNoError) {
+        PRINT_DEBUG("[X] Failed to start input stream: %s", Pa_GetErrorText(paErr));
+        cleanupVoiceRecordingInternal();
+        return false;
+    }
+
+    isRecording = true;
+    PRINT_DEBUG("Successfully started recording!");
    return true;
 }

 void VoiceChat::StopVoiceRecording() {
    if (!isRecording.exchange(false)) return;
-    if (inputStream) {
-        Pa_StopStream(inputStream);
-        Pa_CloseStream(inputStream);
-        inputStream = nullptr;
-    }
-    if (encoder) {
-        opus_encoder_destroy(encoder);
-        encoder = nullptr;
-    }
-    ShutdownVoiceSystem();
+
+    PRINT_DEBUG_ENTRY();
+    cleanupVoiceRecordingInternal();
 }

 bool VoiceChat::StartVoicePlayback() {
-    if (isPlaying.load()) return true;
-    if (!InitVoiceSystem()) return false;
+    if (isPlaying) return true;
+    if (!isSystemInited) {
+        PRINT_DEBUG("[X] VoiceSystem not initialized");
+        return false;
+    }

    PaStreamParameters params{};
    params.device = Pa_GetDefaultOutputDevice();
-    if (params.device == paNoDevice) return false;
-    params.channelCount = 2; // stereo output
+    if (params.device == paNoDevice) {
+        PRINT_DEBUG("[X] Pa_GetDefaultInputDevice failed (no device)");
+        cleanupPlaybackInternal();
+        return false;
+    }
+
+    params.channelCount = CHANNELS_PLAYBACK;
    params.sampleFormat = paInt16;
    params.suggestedLatency = Pa_GetDeviceInfo(params.device)->defaultLowOutputLatency;
    params.hostApiSpecificStreamInfo = nullptr;
@@ -150,116 +263,152 @@ bool VoiceChat::StartVoicePlayback() {
    PaError paErr = Pa_OpenStream(&outputStream, nullptr, &params, SAMPLE_RATE, FRAME_SIZE,
        paClipOff, outputCallback, nullptr);
    if (paErr != paNoError) {
-        PRINT_DEBUG("Failed to open output stream: %s", Pa_GetErrorText(paErr));
+        PRINT_DEBUG("[X] Failed to open output stream: %s", Pa_GetErrorText(paErr));
+        cleanupPlaybackInternal();
        return false;
    }

-    isPlaying.store(true);
-    Pa_StartStream(outputStream);
+    paErr = Pa_StartStream(outputStream);
+    if (paErr != paNoError) {
+        PRINT_DEBUG("[X] Failed to start output stream: %s", Pa_GetErrorText(paErr));
+        cleanupPlaybackInternal();
+        return false;
+    }
+
+    isPlaying = true;
+    PRINT_DEBUG("Successfully started playback!");
    return true;
 }

 void VoiceChat::StopVoicePlayback() {
    if (!isPlaying.exchange(false)) return;
-    if (outputStream) {
-        Pa_StopStream(outputStream);
-        Pa_CloseStream(outputStream);
-        outputStream = nullptr;
-    }

-    std::lock_guard<std::mutex> lock(decoderMapMutex);
-    for (auto& [id, decoder] : decoderMap) {
-        opus_decoder_destroy(decoder);
-    }
-    decoderMap.clear();
-
-    ShutdownVoiceSystem();
+    PRINT_DEBUG_ENTRY();
+    cleanupPlaybackInternal();
 }

 EVoiceResult VoiceChat::GetAvailableVoice(uint32_t* pcbCompressed) {
-    if (!pcbCompressed) return k_EVoiceResultNotInitialized;
-    std::lock_guard<std::mutex> lock(inputMutex);
+    // init this early since some games completely ignore the return result and use this
+    if (pcbCompressed) *pcbCompressed = 0;
+
+    if (!isSystemInited) return k_EVoiceResultNotInitialized;
+    if (!isRecording) return k_EVoiceResultNotRecording;
+    if (!pcbCompressed) return k_EVoiceResultBufferTooSmall;
+
+    std::lock_guard lock(inputMutex);

-    if (!isRecording.load()) return k_EVoiceResultNotRecording;
    if (encodedQueue.empty()) return k_EVoiceResultNoData;

-    *pcbCompressed = static_cast<uint32_t>(encodedQueue.front().size());
+    auto availableBytes = static_cast<uint32_t>(encodedQueue.front().size());
+    *pcbCompressed = availableBytes;
+    PRINT_DEBUG("available %u bytes of voice data", availableBytes);
    return k_EVoiceResultOK;
 }

 EVoiceResult VoiceChat::GetVoice(bool bWantCompressed, void* pDestBuffer, uint32_t cbDestBufferSize, uint32_t* nBytesWritten) {
-    if (!pDestBuffer || !nBytesWritten) return k_EVoiceResultNotInitialized;
+    // init this early since some games completely ignore the return result and use this
+    if (nBytesWritten) *nBytesWritten = 0;

-    // if we does not recording dont do anything.
-    if (isRecording.load()) return k_EVoiceResultNotRecording;
+    if (!isSystemInited) return k_EVoiceResultNotInitialized;
+    if (!isRecording) return k_EVoiceResultNotRecording;
+    if (!pDestBuffer || !nBytesWritten) return k_EVoiceResultBufferTooSmall;

-    // should we have this here ? -detanup
-    // some games might not initialize this. (?? FUCKING WHY? )
-    if (!InitVoiceSystem()) return k_EVoiceResultNotInitialized;
-
-    std::unique_lock<std::mutex> lock(inputMutex);
-    inputCond.wait_for(lock, std::chrono::milliseconds(20), [this] {
-        return !this->encodedQueue.empty();
-        });
+    std::lock_guard lock(inputMutex);

    if (encodedQueue.empty()) return k_EVoiceResultNoData;

-    auto buf = std::move(encodedQueue.front());
-    encodedQueue.pop();
-    lock.unlock();
+    auto& encodedVoice = encodedQueue.front();

+    EVoiceResult ret = k_EVoiceResultOK;
+    uint32_t actualWrittenBytes = 0;
    if (bWantCompressed) {
-        if (cbDestBufferSize < buf.size()) return k_EVoiceResultBufferTooSmall;
-        memcpy(pDestBuffer, buf.data(), buf.size());
-        *nBytesWritten = static_cast<uint32_t>(buf.size());
-        return k_EVoiceResultOK;
+        if (cbDestBufferSize < encodedVoice.size()) {
+            ret = k_EVoiceResultBufferTooSmall;
+        }
+        else {
+            memcpy(pDestBuffer, encodedVoice.data(), encodedVoice.size());
+            actualWrittenBytes = static_cast<uint32_t>(encodedVoice.size());
+        }
    }
    else {
-        int err;
-        OpusDecoder* tempDecoder = opus_decoder_create(SAMPLE_RATE, CHANNELS, &err);
-        if (!tempDecoder || err != OPUS_OK) return k_EVoiceResultNotInitialized;
-
-        int16_t* pcm = static_cast<int16_t*>(pDestBuffer);
-        int samples = opus_decode(tempDecoder, buf.data(), static_cast<opus_int32>(buf.size()), pcm, FRAME_SIZE, 0);
-        opus_decoder_destroy(tempDecoder);
-
-        if (samples < 0) return k_EVoiceResultNotInitialized;
-
-        uint32_t requiredSize = samples * CHANNELS * sizeof(int16_t);
-        if (cbDestBufferSize < requiredSize) return k_EVoiceResultBufferTooSmall;
-
-        *nBytesWritten = requiredSize;
-        return k_EVoiceResultOK;
+        ret = DecompressVoice(reinterpret_cast<const void*>(encodedVoice.data()), (uint32_t)encodedVoice.size(),
+            pDestBuffer, cbDestBufferSize, &actualWrittenBytes, SAMPLE_RATE);
    }
+
+    *nBytesWritten = actualWrittenBytes;
+
+    if (k_EVoiceResultOK == ret) {
+        encodedQueue.pop();
+        PRINT_DEBUG("returned %u bytes of voice data", actualWrittenBytes);
+    }
+    else {
+        PRINT_DEBUG("[X] Failed to get voice data <%i>", ret);
+    }
+    return ret;
 }

 EVoiceResult VoiceChat::DecompressVoice(const void* pCompressed, uint32_t cbCompressed,
    void* pDestBuffer, uint32_t cbDestBufferSize, uint32_t* nBytesWritten,
    uint32_t nDesiredSampleRate) {
-    if (!pCompressed || !pDestBuffer || !nBytesWritten) return k_EVoiceResultNotInitialized;
+    // init this early since some games completely ignore the return result and use this
+    if (nBytesWritten) *nBytesWritten = 0;

-    int err;
-    OpusDecoder* tempDecoder = opus_decoder_create(nDesiredSampleRate, CHANNELS, &err);
-    if (!tempDecoder || err != OPUS_OK) return k_EVoiceResultNotInitialized;
+    if (!pCompressed || !cbCompressed) return k_EVoiceResultNoData;

-    int16_t* pcm = static_cast<int16_t*>(pDestBuffer);
-    int samples = opus_decode(tempDecoder, static_cast<const uint8_t*>(pCompressed), cbCompressed, pcm, FRAME_SIZE, 0);
+    int err{};
+    // we must decompress using the same parameters used in StartVoiceRecording() when creating the encoder
+    // so 'nDesiredSampleRate' is ignored on purpose here
+    OpusDecoder* tempDecoder = opus_decoder_create(SAMPLE_RATE, CHANNELS_RECORDING, &err);
+    if (!tempDecoder || err != OPUS_OK) {
+        PRINT_DEBUG("[X] Opus decoder create failed: %s", opus_strerror(err));
+        return k_EVoiceResultDataCorrupted;
+    }
+
+    auto pcm = std::vector<opus_int16>(MAX_DECODED_RECORDING_SIZE);
+    int samplesPerChannel = opus_decode(tempDecoder, static_cast<const unsigned char*>(pCompressed), (int)cbCompressed,
+        pcm.data(), MAX_FRAME_SIZE, 0);
    opus_decoder_destroy(tempDecoder);

-    if (samples < 0) return k_EVoiceResultNotInitialized;
+    if (samplesPerChannel < 0) {
+        PRINT_DEBUG("[X] Opus decode failed: %s", opus_strerror(samplesPerChannel));
+        return k_EVoiceResultDataCorrupted;
+    }

-    uint32_t bytesRequired = samples * CHANNELS * sizeof(int16_t);
-    if (cbDestBufferSize < bytesRequired) return k_EVoiceResultBufferTooSmall;
+    // https://opus-codec.org/docs/html_api/group__opusdecoder.html#ga1a8b923c1041ad4976ceada237e117ba
+    // "[out] 	pcm 	opus_int16*: Output signal (interleaved if 2 channels). length is frame_size*channels*sizeof(opus_int16)"
+    uint32_t bytesRequired = samplesPerChannel * CHANNELS_RECORDING * sizeof(opus_int16);
+    PRINT_DEBUG("required=%u bytes, buffer size=%u bytes", bytesRequired, cbDestBufferSize);
+    // https://partner.steamgames.com/doc/api/ISteamUser#DecompressVoice
+    // "nBytesWritten: Returns the number of bytes written to pDestBuffer,
+    // or size of the buffer required to decompress the given data
+    // if cbDestBufferSize is not large enough (and k_EVoiceResultBufferTooSmall is returned)."
+    if (nBytesWritten) *nBytesWritten = bytesRequired;
+    if (!pDestBuffer || cbDestBufferSize < bytesRequired) return k_EVoiceResultBufferTooSmall;

-    *nBytesWritten = bytesRequired;
+    memcpy(pDestBuffer, pcm.data(), bytesRequired);
    return k_EVoiceResultOK;
 }

 // Called externally (e.g., from network thread) to enqueue received voice
 // We usually dont need this since it actually sends the voice data by SteamNetworking (or other) with GetVoice && DecompressVoice
-void VoiceChat::QueueIncomingVoice(uint64_t userId, const uint8_t* data, size_t len) {
+void VoiceChat::QueueAudioPlayback(uint64_t userId, const uint8_t* data, size_t len) {
    if (!data || len == 0) return;
-    std::lock_guard<std::mutex> lock(playbackQueueMutex);
+
+    std::lock_guard lock(playbackQueueMutex);
    playbackQueue.push({ userId, std::vector<uint8_t>(data, data + len) });
 }

+bool VoiceChat::IsVoiceSystemInitialized() const
+{
+    return isSystemInited;
+}
+
+bool VoiceChat::IsRecordingActive() const
+{
+    return isRecording;
+}
+
+bool VoiceChat::IsPlaybackActive() const
+{
+    return isPlaying;
+}
--- a/post_build/steam_settings.EXAMPLE/configs.main.EXAMPLE.ini
+++ b/post_build/steam_settings.EXAMPLE/configs.main.EXAMPLE.ini
@@ -18,6 +18,15 @@ steam_deck=0
 # 1=enable avatar functionality
 # default=0
 enable_account_avatar=0
+# enable the experimental voice chat feature
+# ----------------------------
+# XXXXXXXXXXXXXXXXXXXXXXXXXXXX
+# XXX USE AT YOUR OWN RISK XXX
+# XXXXXXXXXXXXXXXXXXXXXXXXXXXX
+# ----------------------------
+# this may result in higher system usage and cause performance drop, or cause crashes
+# default=0
+enable_voice_chat=0
 # 1=synchronize user stats/achievements with game servers as soon as possible instead of caching them until the next call to `Steam_RunCallbacks()`
 # not recommended to enable this
 # default=0