From 27b0373658e4a08e0cafc3bb64296d4c1e0cd441 Mon Sep 17 00:00:00 2001
From: a <e>
Date: Mon, 3 Nov 2025 00:17:43 +0200
Subject: [PATCH 1/3] rewrite voice chat

---
 dll/dll/voicechat.h |  78 ++++++--
 dll/steam_user.cpp  |  25 ++-
 dll/voicechat.cpp   | 475 ++++++++++++++++++++++++++++----------------
 3 files changed, 387 insertions(+), 191 deletions(-)
diff --git a/dll/dll/voicechat.h b/dll/dll/voicechat.h
index f4a2934f..b2826dc0 100644
--- a/dll/dll/voicechat.h
+++ b/dll/dll/voicechat.h
@@ -22,43 +22,75 @@
 #include <opus/opus.h>
 #include <portaudio.h>
 
+// recording: how many mic samples are recorded in 1 second
+// playback: ???
 #define SAMPLE_RATE 48000
-#define CHANNELS 1
-#define FRAME_SIZE 960 // 20ms @ 48kHz
-#define MAX_ENCODED_SIZE 4000
-#define MAX_DECODED_SIZE (FRAME_SIZE * 2 * sizeof(int16_t)) // for stereo
-#define DEFAULT_BITRATE 32000
+// mic/playback channels, steam only support mono mic channels
+// https://partner.steamgames.com/doc/api/ISteamUser#DecompressVoice
+// "The output data is raw single-channel 16-bit PCM audio. The decoder supports any sample rate from 11025 to 48000"
+#define CHANNELS_RECORDING 1
+// stereo output
+#define CHANNELS_PLAYBACK 2
+// https://partner.steamgames.com/doc/api/ISteamUser#GetVoice
+// "It is recommended that you pass in an 8 kilobytes or larger destination buffer for compressed audio"
+#define MAX_ENCODED_SIZE 8192
+// how many mic samples to buffer (internally by Port Audio) before firing our mic callback
+// >>> sample time = (1/48000) = 0.02ms
+// >>> 20ms (desired callback rate) / 0.02ms (sample time) = 960 frames
+#define FRAME_SIZE 960
+// https://opus-codec.org/docs/html_api/group__opusdecoder.html#ga1a8b923c1041ad4976ceada237e117ba
+// "[out] 	pcm 	opus_int16*: Output signal (interleaved if 2 channels). length is frame_size*channels*sizeof(opus_int16)"
+// "[in] 	frame_size 	Number of samples per channel of available space in *pcm, if less than the maximum frame size (120ms) some frames can not be decoded"
+// so we have to account for the worst case scenario which is a max of 120ms frame size
+// >>> sample time = (1/48000) = 0.02ms
+// >>> 120ms (worst callback rate) / 0.02ms (sample time) = 5760 frames
+// >>> 5760 frames (worst case) / 960 frames (our case) = 6
+#define MAX_FRAME_SIZE (FRAME_SIZE * 6)
+#define MAX_DECODED_RECORDING_SIZE (MAX_FRAME_SIZE * CHANNELS_RECORDING)
+#define MAX_DECODED_PLAYBACK_SIZE  (MAX_FRAME_SIZE * CHANNELS_PLAYBACK)
 
 struct VoicePacket {
-    uint64_t userId;
+    uint64_t userId = 0;
     std::vector<uint8_t> encoded;
 };
 
 class VoiceChat
 {
+    // is PortAudio lib initialized
+    std::atomic<bool> isSystemInited{ false };
+
+    // --- recording
     std::atomic<bool> isRecording{ false };
-    std::atomic<bool> isPlaying{ false };
-
-    std::mutex inputMutex;
-    std::condition_variable inputCond;
+    std::recursive_mutex inputMutex;
     std::queue<std::vector<uint8_t>> encodedQueue;
-
-    std::mutex playbackQueueMutex;
-
-    std::queue<VoicePacket> playbackQueue;
-
-    std::mutex decoderMapMutex;
-    std::unordered_map<uint64_t, OpusDecoder*> decoderMap;
-
     OpusEncoder* encoder = nullptr;
     PaStream* inputStream = nullptr;
+    // --- recording
+
+    // --- playback
+    std::atomic<bool> isPlaying{ false };
+    std::recursive_mutex playbackQueueMutex;
+    std::queue<VoicePacket> playbackQueue;
+    std::recursive_mutex decoderMapMutex;
+    std::unordered_map<uint64_t, OpusDecoder*> decoderMap; // TODO do we need a decoder for each user?
     PaStream* outputStream = nullptr;
+    // --- playback
+
+    void cleanupVoiceRecordingInternal();
+    void cleanupPlaybackInternal();
+
+    // recording callback
     static int inputCallback(const void* input, void*, unsigned long frameCount,
         const PaStreamCallbackTimeInfo*, PaStreamCallbackFlags, void*);
+
+    // playback callback
     static int outputCallback(const void*, void* output, unsigned long frameCount,
         const PaStreamCallbackTimeInfo*, PaStreamCallbackFlags, void*);
 
 public:
+    VoiceChat() = default;
+    ~VoiceChat();
+
     bool InitVoiceSystem();
 
     void ShutdownVoiceSystem();
@@ -79,7 +111,13 @@ public:
         void* pDestBuffer, uint32_t cbDestBufferSize, uint32_t* nBytesWritten,
         uint32_t nDesiredSampleRate);
 
-    void QueueIncomingVoice(uint64_t userId, const uint8_t* data, size_t len);
+    void QueueAudioPlayback(uint64_t userId, const uint8_t* data, size_t len);
+
+    bool IsVoiceSystemInitialized() const;
+    
+    bool IsRecordingActive() const;
+    
+    bool IsPlaybackActive() const;
 };
 
-#endif // VOICECHAT_INCLUDE_H
+#endif // VOICECHAT_INCLUDE_H
diff --git a/dll/steam_user.cpp b/dll/steam_user.cpp
index ceb7c0ca..f52369ee 100644
--- a/dll/steam_user.cpp
+++ b/dll/steam_user.cpp
@@ -36,6 +36,7 @@ Steam_User::Steam_User(Settings *settings, Local_Storage *local_storage, class N
 Steam_User::~Steam_User()
 {
     delete auth_manager;
+    delete voicechat;
 }
 
 // returns the HSteamUser this interface represents
@@ -495,8 +496,13 @@ bool Steam_User::GetUserDataFolder( char *pchBuffer, int cubBuffer )
 // Starts voice recording. Once started, use GetVoice() to get the data
 void Steam_User::StartVoiceRecording( )
 {
-    PRINT_DEBUG_ENTRY();
-    voicechat->StartVoiceRecording();
+    if (!voicechat->IsRecordingActive()) {
+        PRINT_DEBUG_ENTRY();
+
+        if (voicechat->InitVoiceSystem()) {
+            voicechat->StartVoiceRecording();
+        }
+    }
 }
 
 // Stops voice recording. Because people often release push-to-talk keys early, the system will keep recording for
@@ -515,6 +521,12 @@ void Steam_User::StopVoiceRecording( )
 EVoiceResult Steam_User::GetAvailableVoice( uint32 *pcbCompressed, uint32 *pcbUncompressed_Deprecated, uint32 nUncompressedVoiceDesiredSampleRate_Deprecated  )
 {
     PRINT_DEBUG_ENTRY();
+
+    if (pcbCompressed) *pcbCompressed = 0;
+    if (pcbUncompressed_Deprecated) *pcbUncompressed_Deprecated = 0;
+
+    // some games like appid 34330 don't call this
+    StartVoiceRecording();
     return voicechat->GetAvailableVoice(pcbCompressed);
 }
 
@@ -548,6 +560,13 @@ EVoiceResult Steam_User::GetAvailableVoice(uint32 *pcbCompressed, uint32 *pcbUnc
 EVoiceResult Steam_User::GetVoice( bool bWantCompressed, void *pDestBuffer, uint32 cbDestBufferSize, uint32 *nBytesWritten, bool bWantUncompressed_Deprecated, void *pUncompressedDestBuffer_Deprecated , uint32 cbUncompressedDestBufferSize_Deprecated , uint32 *nUncompressBytesWritten_Deprecated , uint32 nUncompressedVoiceDesiredSampleRate_Deprecated  )
 {
     PRINT_DEBUG_ENTRY();
+    if (nBytesWritten) *nBytesWritten = 0;
+    if (nUncompressBytesWritten_Deprecated) *nUncompressBytesWritten_Deprecated = 0;
+
+    // should we have this here ? -detanup
+    // some games might not initialize this.
+    // example appid 34330
+    StartVoiceRecording();
     return voicechat->GetVoice(bWantCompressed, pDestBuffer, cbDestBufferSize, nBytesWritten);
 }
 
@@ -597,7 +616,7 @@ EVoiceResult Steam_User::DecompressVoice( void *pCompressed, uint32 cbCompressed
 uint32 Steam_User::GetVoiceOptimalSampleRate()
 {
     PRINT_DEBUG_ENTRY();
-    return 48000;
+    return SAMPLE_RATE;
 }
 
 // Retrieve ticket to be sent to the entity who wishes to authenticate you. 
diff --git a/dll/voicechat.cpp b/dll/voicechat.cpp
index 12927a60..3e110fd0 100644
--- a/dll/voicechat.cpp
+++ b/dll/voicechat.cpp
@@ -1,110 +1,214 @@
 #include "dll/voicechat.h"
 
-static std::atomic<bool> isInited{ false };
+
+void VoiceChat::cleanupVoiceRecordingInternal()
+{
+    if (inputStream) {
+        Pa_AbortStream(inputStream);
+        Pa_CloseStream(inputStream);
+        inputStream = nullptr;
+        PRINT_DEBUG("Closed input stream");
+    }
+
+    if (encoder) {
+        opus_encoder_destroy(encoder);
+        encoder = nullptr;
+        PRINT_DEBUG("Destroyed input encoder");
+    }
+
+    // this must be in a local scope (even without the lock)
+    // so that the swapped/old buffer gets destroyed
+    {
+        std::lock_guard lock(inputMutex);
+
+        std::queue<std::vector<uint8_t>> empty{};
+        std::swap(encodedQueue, empty);
+    }
+
+    isRecording = false;
+}
+
+void VoiceChat::cleanupPlaybackInternal()
+{
+    if (outputStream) {
+        Pa_AbortStream(outputStream);
+        Pa_CloseStream(outputStream);
+        outputStream = nullptr;
+        PRINT_DEBUG("Closed output stream");
+    }
+
+    {
+        std::lock_guard lock(decoderMapMutex);
+        for (auto& [id, decoder] : decoderMap) {
+            if (decoder) {
+                opus_decoder_destroy(decoder);
+            }
+        }
+        decoderMap.clear();
+    }
+
+    // this must be in a local scope (even without the lock)
+    // so that the swapped/old buffer gets destroyed
+    {
+        std::lock_guard lock(playbackQueueMutex);
+
+        std::queue<VoicePacket> empty{};
+        std::swap(playbackQueue, empty);
+    }
+
+    isPlaying = false;
+}
+
+// https://www.portaudio.com/docs/v19-doxydocs/paex__record_8c_source.html
+int VoiceChat::inputCallback(const void* input, void*, unsigned long frameCount,
+    const PaStreamCallbackTimeInfo*, PaStreamCallbackFlags, void* data) {
+    auto self_ref = reinterpret_cast<VoiceChat*>(data);
+    if (!input || !self_ref->isRecording) return paContinue;
+
+    std::vector<uint8_t> encoded(MAX_ENCODED_SIZE);
+    int len = opus_encode(self_ref->encoder, reinterpret_cast<const int16_t*>(input), frameCount,
+        encoded.data(), encoded.size());
+    if (len > 0) {
+        encoded.resize(len);
+        {
+            std::lock_guard lock(self_ref->inputMutex);
+            self_ref->encodedQueue.emplace(std::move(encoded));
+        }
+    }
+    else {
+        PRINT_DEBUG("[X] Opus encoding failed: %s", opus_strerror(len));
+    }
+    return paContinue;
+}
+
+int VoiceChat::outputCallback(const void*, void* output, unsigned long frameCount /* frames per 1 channel! */,
+    const PaStreamCallbackTimeInfo*, PaStreamCallbackFlags, void* data) {
+    auto self_ref = reinterpret_cast<VoiceChat*>(data);
+    auto out = reinterpret_cast<int16_t*>(output);
+
+    unsigned long remainingFrames = frameCount;
+
+    while (true) {
+        if (remainingFrames <= 0) break;
+
+        VoicePacket pkt{};
+        {
+            std::lock_guard lock(self_ref->playbackQueueMutex);
+
+            if (self_ref->playbackQueue.empty()) break;
+
+            pkt = std::move(self_ref->playbackQueue.front());
+            self_ref->playbackQueue.pop();
+        }
+
+
+        OpusDecoder* decoder = nullptr;
+        {
+            std::lock_guard lock(self_ref->decoderMapMutex);
+
+            auto it_decoder = self_ref->decoderMap.find(pkt.userId);
+            if (self_ref->decoderMap.end() != it_decoder) {
+                decoder = it_decoder->second;
+            }
+            else {
+                int err = 0;
+                // we must decompress using the same parameters used in StartVoicePlayback() when creating the encoder
+                decoder = opus_decoder_create(SAMPLE_RATE, CHANNELS_PLAYBACK, &err);
+                if (err != OPUS_OK || !decoder) {
+                    PRINT_DEBUG("[X] Opus decoder create failed: %s", opus_strerror(err));
+                    continue;
+                }
+
+                self_ref->decoderMap[pkt.userId] = decoder;
+            }
+        }
+
+        auto pcm = std::vector<opus_int16>(MAX_DECODED_PLAYBACK_SIZE);
+        int samplesPerChannel = opus_decode(decoder, (const unsigned char*)pkt.encoded.data(), (int)pkt.encoded.size(),
+            pcm.data(), MAX_FRAME_SIZE, 0);
+        if (samplesPerChannel < 0) {
+            PRINT_DEBUG("[X] Opus decode failed: %s", opus_strerror(samplesPerChannel));
+            break;
+        }
+
+        if ((unsigned long)samplesPerChannel > remainingFrames) {
+            samplesPerChannel = remainingFrames;
+        }
+        // https://opus-codec.org/docs/html_api/group__opusdecoder.html#ga1a8b923c1041ad4976ceada237e117ba
+        // "[out] 	pcm 	opus_int16*: Output signal (interleaved if 2 channels). length is frame_size*channels*sizeof(opus_int16)"
+        uint32_t bytesRequired = samplesPerChannel * CHANNELS_PLAYBACK * sizeof(opus_int16);
+        memcpy(out, pcm.data(), bytesRequired);
+
+        // update the pointers
+        remainingFrames -= (unsigned long)samplesPerChannel;
+        out += samplesPerChannel * CHANNELS_PLAYBACK;
+    }
+
+    return paContinue;
+}
+
+
+// --- !!! ------ !!! ------ !!! ------ !!! ------ !!! ---
+// --- !!! ------ !!! ------ !!! ------ !!! ------ !!! ---
+// don't init PortAudio or any other external libraries in the constructor
+// always do lazy initialization, this makes it less likely to encounter
+// a crash because of these external libraries if the current game isn't
+// even using the Steam recording feature
+// --- !!! ------ !!! ------ !!! ------ !!! ------ !!! ---
+// --- !!! ------ !!! ------ !!! ------ !!! ------ !!! ---
+
+VoiceChat::~VoiceChat()
+{
+    cleanupVoiceRecordingInternal();
+    cleanupPlaybackInternal();
+    ShutdownVoiceSystem();
+}
 
 bool VoiceChat::InitVoiceSystem() {
-    if (!isInited) {
-        if (Pa_Initialize() != paNoError) {
-            PRINT_DEBUG("PortAudio initialization failed");
-            return false;
-        }
-        isInited = true;
+    if (isSystemInited) return true;
+
+    PaError paErr = Pa_Initialize();
+    if (paErr != paNoError) {
+        PRINT_DEBUG("[X] PortAudio initialization failed: %s", Pa_GetErrorText(paErr));
+        return false;
     }
-    isRecording = false;
-    isPlaying = false;
-    encoder = nullptr;
-    inputStream = nullptr;
-    outputStream = nullptr;
-    PRINT_DEBUG("VoiceSystem initialized!");
+
+    isSystemInited = true;
+    PRINT_DEBUG("Successfully initialized VoiceSystem!");
     return true;
 }
 
 void VoiceChat::ShutdownVoiceSystem() {
-    if (isInited) {
-        Pa_Terminate();
-        isInited = false;
-        PRINT_DEBUG("VoiceSystem Terminated!");
-    }
-}
+    if (!isSystemInited.exchange(false)) return;
 
-int VoiceChat::inputCallback(const void* input, void*, unsigned long frameCount,
-    const PaStreamCallbackTimeInfo*, PaStreamCallbackFlags, void* data) {
-    VoiceChat* chat = static_cast<VoiceChat*>(data);
-    if (!input || frameCount != FRAME_SIZE || !chat->isRecording.load()) return paContinue;
-
-    std::vector<uint8_t> encoded(MAX_ENCODED_SIZE);
-    int len = opus_encode(chat->encoder, static_cast<const int16_t*>(input), frameCount,
-        encoded.data(), MAX_ENCODED_SIZE);
-    if (len > 0) {
-        encoded.resize(len);
-        {
-            std::lock_guard<std::mutex> lock(chat->inputMutex);
-            chat->encodedQueue.push(std::move(encoded));
-        }
-        chat->inputCond.notify_one();
-    }
-    else {
-        PRINT_DEBUG("Opus encoding failed: %d", len);
-    }
-    return paContinue;
-}
-
-int VoiceChat::outputCallback(const void*, void* output, unsigned long frameCount,
-    const PaStreamCallbackTimeInfo*, PaStreamCallbackFlags, void* data) {
-    VoiceChat* chat = static_cast<VoiceChat*>(data);
-    int16_t* out = static_cast<int16_t*>(output);
-    memset(out, 0, frameCount * sizeof(int16_t) * 2); // support stereo output
-
-    std::lock_guard<std::mutex> lock(chat->playbackQueueMutex);
-    size_t mixCount = 0;
-
-    while (!chat->playbackQueue.empty()) {
-        VoicePacket pkt = chat->playbackQueue.front();
-        chat->playbackQueue.pop();
-
-        OpusDecoder* decoder = nullptr;
-        {
-            std::lock_guard<std::mutex> dlock(chat->decoderMapMutex);
-            decoder = chat->decoderMap[pkt.userId];
-            if (!decoder) {
-                int err = 0;
-                decoder = opus_decoder_create(SAMPLE_RATE, CHANNELS, &err);
-                if (err != OPUS_OK || !decoder) continue;
-                chat->decoderMap[pkt.userId] = decoder;
-            }
-        }
-
-        int16_t tempBuffer[FRAME_SIZE] = { 0 };
-        int decoded = opus_decode(decoder, pkt.encoded.data(), pkt.encoded.size(), tempBuffer, frameCount, 0);
-        if (decoded > 0) {
-            for (int i = 0; i < decoded; ++i) {
-                out[2 * i] += tempBuffer[i] / 2;     // left
-                out[2 * i + 1] += tempBuffer[i] / 2; // right
-            }
-            ++mixCount;
-        }
-    }
-
-    return paContinue;
+    Pa_Terminate();
+    PRINT_DEBUG("VoiceSystem Terminated!");
 }
 
 bool VoiceChat::StartVoiceRecording() {
-    if (isRecording.load()) return true;
-    if (!InitVoiceSystem()) return false;
-
-    int err = 0;
-    encoder = opus_encoder_create(SAMPLE_RATE, CHANNELS, OPUS_APPLICATION_VOIP, &err);
-    if (!encoder || err != OPUS_OK) {
-        PRINT_DEBUG("Opus encoder create failed: %d", err);
+    if (isRecording) return true;
+    if (!isSystemInited) {
+        PRINT_DEBUG("[X] VoiceSystem not initialized");
         return false;
     }
 
-    opus_encoder_ctl(encoder, OPUS_SET_BITRATE(DEFAULT_BITRATE));
+    int err = 0;
+    encoder = opus_encoder_create(SAMPLE_RATE, CHANNELS_RECORDING, OPUS_APPLICATION_VOIP, &err);
+    if (!encoder || err != OPUS_OK) {
+        PRINT_DEBUG("[X] Opus decoder create failed: %s", opus_strerror(err));
+        cleanupVoiceRecordingInternal();
+        return false;
+    }
 
     PaStreamParameters params{};
     params.device = Pa_GetDefaultInputDevice();
-    if (params.device == paNoDevice) return false;
-    params.channelCount = CHANNELS;
+    if (params.device == paNoDevice) {
+        PRINT_DEBUG("[X] Pa_GetDefaultInputDevice failed (no device)");
+        cleanupVoiceRecordingInternal();
+        return false;
+    }
+
+    params.channelCount = CHANNELS_RECORDING;
     params.sampleFormat = paInt16;
     params.suggestedLatency = Pa_GetDeviceInfo(params.device)->defaultLowInputLatency;
     params.hostApiSpecificStreamInfo = nullptr;
@@ -112,37 +216,46 @@ bool VoiceChat::StartVoiceRecording() {
     PaError paErr = Pa_OpenStream(&inputStream, &params, nullptr, SAMPLE_RATE, FRAME_SIZE,
         paClipOff, inputCallback, this);
     if (paErr != paNoError) {
-        PRINT_DEBUG("Failed to open input stream: %s", Pa_GetErrorText(paErr));
+        PRINT_DEBUG("[X] Failed to open input stream: %s", Pa_GetErrorText(paErr));
+        cleanupVoiceRecordingInternal();
         return false;
     }
 
-    isRecording.store(true);
-    Pa_StartStream(inputStream);
+    paErr = Pa_StartStream(inputStream);
+    if (paErr != paNoError) {
+        PRINT_DEBUG("[X] Failed to start input stream: %s", Pa_GetErrorText(paErr));
+        cleanupVoiceRecordingInternal();
+        return false;
+    }
+
+    isRecording = true;
+    PRINT_DEBUG("Successfully started recording!");
     return true;
 }
 
 void VoiceChat::StopVoiceRecording() {
     if (!isRecording.exchange(false)) return;
-    if (inputStream) {
-        Pa_StopStream(inputStream);
-        Pa_CloseStream(inputStream);
-        inputStream = nullptr;
-    }
-    if (encoder) {
-        opus_encoder_destroy(encoder);
-        encoder = nullptr;
-    }
-    ShutdownVoiceSystem();
+
+    PRINT_DEBUG_ENTRY();
+    cleanupVoiceRecordingInternal();
 }
 
 bool VoiceChat::StartVoicePlayback() {
-    if (isPlaying.load()) return true;
-    if (!InitVoiceSystem()) return false;
+    if (isPlaying) return true;
+    if (!isSystemInited) {
+        PRINT_DEBUG("[X] VoiceSystem not initialized");
+        return false;
+    }
 
     PaStreamParameters params{};
     params.device = Pa_GetDefaultOutputDevice();
-    if (params.device == paNoDevice) return false;
-    params.channelCount = 2; // stereo output
+    if (params.device == paNoDevice) {
+        PRINT_DEBUG("[X] Pa_GetDefaultInputDevice failed (no device)");
+        cleanupPlaybackInternal();
+        return false;
+    }
+
+    params.channelCount = CHANNELS_PLAYBACK;
     params.sampleFormat = paInt16;
     params.suggestedLatency = Pa_GetDeviceInfo(params.device)->defaultLowOutputLatency;
     params.hostApiSpecificStreamInfo = nullptr;
@@ -150,37 +263,40 @@ bool VoiceChat::StartVoicePlayback() {
     PaError paErr = Pa_OpenStream(&outputStream, nullptr, &params, SAMPLE_RATE, FRAME_SIZE,
         paClipOff, outputCallback, nullptr);
     if (paErr != paNoError) {
-        PRINT_DEBUG("Failed to open output stream: %s", Pa_GetErrorText(paErr));
+        PRINT_DEBUG("[X] Failed to open output stream: %s", Pa_GetErrorText(paErr));
+        cleanupPlaybackInternal();
         return false;
     }
 
-    isPlaying.store(true);
-    Pa_StartStream(outputStream);
+    paErr = Pa_StartStream(outputStream);
+    if (paErr != paNoError) {
+        PRINT_DEBUG("[X] Failed to start output stream: %s", Pa_GetErrorText(paErr));
+        cleanupPlaybackInternal();
+        return false;
+    }
+
+    isPlaying = true;
+    PRINT_DEBUG("Successfully started playback!");
     return true;
 }
 
 void VoiceChat::StopVoicePlayback() {
     if (!isPlaying.exchange(false)) return;
-    if (outputStream) {
-        Pa_StopStream(outputStream);
-        Pa_CloseStream(outputStream);
-        outputStream = nullptr;
-    }
 
-    std::lock_guard<std::mutex> lock(decoderMapMutex);
-    for (auto& [id, decoder] : decoderMap) {
-        opus_decoder_destroy(decoder);
-    }
-    decoderMap.clear();
-
-    ShutdownVoiceSystem();
+    PRINT_DEBUG_ENTRY();
+    cleanupPlaybackInternal();
 }
 
 EVoiceResult VoiceChat::GetAvailableVoice(uint32_t* pcbCompressed) {
-    if (!pcbCompressed) return k_EVoiceResultNotInitialized;
-    std::lock_guard<std::mutex> lock(inputMutex);
+    // init this early since some games completely ignore the return result and use this
+    if (pcbCompressed) *pcbCompressed = 0;
+
+    if (!isSystemInited) return k_EVoiceResultNotInitialized;
+    if (!isRecording) return k_EVoiceResultNotRecording;
+    if (!pcbCompressed) return k_EVoiceResultBufferTooSmall;
+
+    std::lock_guard lock(inputMutex);
 
-    if (!isRecording.load()) return k_EVoiceResultNotRecording;
     if (encodedQueue.empty()) return k_EVoiceResultNoData;
 
     *pcbCompressed = static_cast<uint32_t>(encodedQueue.front().size());
@@ -188,78 +304,101 @@ EVoiceResult VoiceChat::GetAvailableVoice(uint32_t* pcbCompressed) {
 }
 
 EVoiceResult VoiceChat::GetVoice(bool bWantCompressed, void* pDestBuffer, uint32_t cbDestBufferSize, uint32_t* nBytesWritten) {
-    if (!pDestBuffer || !nBytesWritten) return k_EVoiceResultNotInitialized;
+    // init this early since some games completely ignore the return result and use this
+    if (nBytesWritten) *nBytesWritten = 0;
 
-    // if we does not recording dont do anything.
-    if (isRecording.load()) return k_EVoiceResultNotRecording;
+    if (!isSystemInited) return k_EVoiceResultNotInitialized;
+    if (!isRecording) return k_EVoiceResultNotRecording;
+    if (!pDestBuffer || !nBytesWritten) return k_EVoiceResultBufferTooSmall;
 
-    // should we have this here ? -detanup
-    // some games might not initialize this. (?? FUCKING WHY? )
-    if (!InitVoiceSystem()) return k_EVoiceResultNotInitialized;
-
-    std::unique_lock<std::mutex> lock(inputMutex);
-    inputCond.wait_for(lock, std::chrono::milliseconds(20), [this] {
-        return !this->encodedQueue.empty();
-        });
+    std::lock_guard lock(inputMutex);
 
     if (encodedQueue.empty()) return k_EVoiceResultNoData;
 
-    auto buf = std::move(encodedQueue.front());
-    encodedQueue.pop();
-    lock.unlock();
+    auto& encodedVoice = encodedQueue.front();
 
+    EVoiceResult ret = k_EVoiceResultOK;
     if (bWantCompressed) {
-        if (cbDestBufferSize < buf.size()) return k_EVoiceResultBufferTooSmall;
-        memcpy(pDestBuffer, buf.data(), buf.size());
-        *nBytesWritten = static_cast<uint32_t>(buf.size());
-        return k_EVoiceResultOK;
+        if (cbDestBufferSize < encodedVoice.size()) {
+            ret = k_EVoiceResultBufferTooSmall;
+        }
+        else {
+            memcpy(pDestBuffer, encodedVoice.data(), encodedVoice.size());
+            *nBytesWritten = static_cast<uint32_t>(encodedVoice.size());
+        }
     }
     else {
-        int err;
-        OpusDecoder* tempDecoder = opus_decoder_create(SAMPLE_RATE, CHANNELS, &err);
-        if (!tempDecoder || err != OPUS_OK) return k_EVoiceResultNotInitialized;
-
-        int16_t* pcm = static_cast<int16_t*>(pDestBuffer);
-        int samples = opus_decode(tempDecoder, buf.data(), static_cast<opus_int32>(buf.size()), pcm, FRAME_SIZE, 0);
-        opus_decoder_destroy(tempDecoder);
-
-        if (samples < 0) return k_EVoiceResultNotInitialized;
-
-        uint32_t requiredSize = samples * CHANNELS * sizeof(int16_t);
-        if (cbDestBufferSize < requiredSize) return k_EVoiceResultBufferTooSmall;
-
-        *nBytesWritten = requiredSize;
-        return k_EVoiceResultOK;
+        ret = DecompressVoice(reinterpret_cast<const void*>(encodedVoice.data()), (uint32_t)encodedVoice.size(),
+            pDestBuffer, cbDestBufferSize, nBytesWritten, SAMPLE_RATE);
     }
+
+    if (k_EVoiceResultOK == ret) {
+        encodedQueue.pop();
+    }
+    return ret;
 }
 
 EVoiceResult VoiceChat::DecompressVoice(const void* pCompressed, uint32_t cbCompressed,
     void* pDestBuffer, uint32_t cbDestBufferSize, uint32_t* nBytesWritten,
     uint32_t nDesiredSampleRate) {
-    if (!pCompressed || !pDestBuffer || !nBytesWritten) return k_EVoiceResultNotInitialized;
+    // init this early since some games completely ignore the return result and use this
+    if (nBytesWritten) *nBytesWritten = 0;
 
-    int err;
-    OpusDecoder* tempDecoder = opus_decoder_create(nDesiredSampleRate, CHANNELS, &err);
-    if (!tempDecoder || err != OPUS_OK) return k_EVoiceResultNotInitialized;
+    if (!pCompressed || !cbCompressed) return k_EVoiceResultNoData;
 
-    int16_t* pcm = static_cast<int16_t*>(pDestBuffer);
-    int samples = opus_decode(tempDecoder, static_cast<const uint8_t*>(pCompressed), cbCompressed, pcm, FRAME_SIZE, 0);
+    int err{};
+    // we must decompress using the same parameters used in StartVoiceRecording() when creating the encoder
+    // so 'nDesiredSampleRate' is ignored on purpose here
+    OpusDecoder* tempDecoder = opus_decoder_create(SAMPLE_RATE, CHANNELS_RECORDING, &err);
+    if (!tempDecoder || err != OPUS_OK) {
+        PRINT_DEBUG("[X] Opus decoder create failed: %s", opus_strerror(err));
+        return k_EVoiceResultDataCorrupted;
+    }
+
+    auto pcm = std::vector<opus_int16>(MAX_DECODED_RECORDING_SIZE);
+    int samplesPerChannel = opus_decode(tempDecoder, static_cast<const unsigned char*>(pCompressed), (int)cbCompressed,
+        pcm.data(), MAX_FRAME_SIZE, 0);
     opus_decoder_destroy(tempDecoder);
 
-    if (samples < 0) return k_EVoiceResultNotInitialized;
+    if (samplesPerChannel < 0) {
+        PRINT_DEBUG("[X] Opus decode failed: %s", opus_strerror(samplesPerChannel));
+        return k_EVoiceResultDataCorrupted;
+    }
 
-    uint32_t bytesRequired = samples * CHANNELS * sizeof(int16_t);
-    if (cbDestBufferSize < bytesRequired) return k_EVoiceResultBufferTooSmall;
+    // https://opus-codec.org/docs/html_api/group__opusdecoder.html#ga1a8b923c1041ad4976ceada237e117ba
+    // "[out] 	pcm 	opus_int16*: Output signal (interleaved if 2 channels). length is frame_size*channels*sizeof(opus_int16)"
+    uint32_t bytesRequired = samplesPerChannel * CHANNELS_RECORDING * sizeof(opus_int16);
+    // https://partner.steamgames.com/doc/api/ISteamUser#DecompressVoice
+    // "nBytesWritten: Returns the number of bytes written to pDestBuffer,
+    // or size of the buffer required to decompress the given data
+    // if cbDestBufferSize is not large enough (and k_EVoiceResultBufferTooSmall is returned)."
+    if (nBytesWritten) *nBytesWritten = bytesRequired;
+    if (!pDestBuffer || cbDestBufferSize < bytesRequired) return k_EVoiceResultBufferTooSmall;
 
-    *nBytesWritten = bytesRequired;
+    memcpy(pDestBuffer, pcm.data(), bytesRequired);
     return k_EVoiceResultOK;
 }
 
 // Called externally (e.g., from network thread) to enqueue received voice
 // We usually dont need this since it actually sends the voice data by SteamNetworking (or other) with GetVoice && DecompressVoice
-void VoiceChat::QueueIncomingVoice(uint64_t userId, const uint8_t* data, size_t len) {
+void VoiceChat::QueueAudioPlayback(uint64_t userId, const uint8_t* data, size_t len) {
     if (!data || len == 0) return;
-    std::lock_guard<std::mutex> lock(playbackQueueMutex);
+
+    std::lock_guard lock(playbackQueueMutex);
     playbackQueue.push({ userId, std::vector<uint8_t>(data, data + len) });
 }
 
+bool VoiceChat::IsVoiceSystemInitialized() const
+{
+    return isSystemInited;
+}
+
+bool VoiceChat::IsRecordingActive() const
+{
+    return isRecording;
+}
+
+bool VoiceChat::IsPlaybackActive() const
+{
+    return isPlaying;
+}

From c71f660f6a4b2bbce553c0ff362ee8e37faee738 Mon Sep 17 00:00:00 2001
From: a <e>
Date: Mon, 3 Nov 2025 00:33:14 +0200
Subject: [PATCH 2/3] add feature flag to voice chat

---
 dll/dll/settings.h                                       | 3 +++
 dll/settings_parser.cpp                                  | 3 +++
 dll/steam_user.cpp                                       | 6 ++++++
 .../steam_settings.EXAMPLE/configs.main.EXAMPLE.ini      | 9 +++++++++
 4 files changed, 21 insertions(+)

diff --git a/dll/dll/settings.h b/dll/dll/settings.h
index a7c07fc4..ccdf2305 100644
--- a/dll/dll/settings.h
+++ b/dll/dll/settings.h
@@ -369,6 +369,9 @@ public:
     // free weekend
     bool free_weekend = false;
 
+    // voice chat
+    bool enable_voice_chat = false;
+
 
 #ifdef LOBBY_CONNECT
     static constexpr const bool is_lobby_connect = true;
diff --git a/dll/settings_parser.cpp b/dll/settings_parser.cpp
index f2b9b745..8b04045c 100644
--- a/dll/settings_parser.cpp
+++ b/dll/settings_parser.cpp
@@ -1505,6 +1505,9 @@ static void parse_simple_features(class Settings *settings_client, class Setting
     settings_client->disable_account_avatar = !ini.GetBoolValue("main::general", "enable_account_avatar", !settings_client->disable_account_avatar);
     settings_server->disable_account_avatar = !ini.GetBoolValue("main::general", "enable_account_avatar", !settings_server->disable_account_avatar);
 
+    settings_client->enable_voice_chat = ini.GetBoolValue("main::general", "enable_voice_chat", settings_client->enable_voice_chat);
+    settings_server->enable_voice_chat = ini.GetBoolValue("main::general", "enable_voice_chat", settings_server->enable_voice_chat);
+
     settings_client->steam_deck = ini.GetBoolValue("main::general", "steam_deck", settings_client->steam_deck);
     settings_server->steam_deck = ini.GetBoolValue("main::general", "steam_deck", settings_server->steam_deck);
 
diff --git a/dll/steam_user.cpp b/dll/steam_user.cpp
index f52369ee..36fe5605 100644
--- a/dll/steam_user.cpp
+++ b/dll/steam_user.cpp
@@ -496,6 +496,8 @@ bool Steam_User::GetUserDataFolder( char *pchBuffer, int cubBuffer )
 // Starts voice recording. Once started, use GetVoice() to get the data
 void Steam_User::StartVoiceRecording( )
 {
+    if (!settings->enable_voice_chat) return;
+
     if (!voicechat->IsRecordingActive()) {
         PRINT_DEBUG_ENTRY();
 
@@ -511,6 +513,8 @@ void Steam_User::StartVoiceRecording( )
 void Steam_User::StopVoiceRecording( )
 {
     PRINT_DEBUG_ENTRY();
+    if (!settings->enable_voice_chat) return;
+
     voicechat->StopVoiceRecording();
 }
 
@@ -524,6 +528,7 @@ EVoiceResult Steam_User::GetAvailableVoice( uint32 *pcbCompressed, uint32 *pcbUn
 
     if (pcbCompressed) *pcbCompressed = 0;
     if (pcbUncompressed_Deprecated) *pcbUncompressed_Deprecated = 0;
+    if (!settings->enable_voice_chat) return k_EVoiceResultNoData;
 
     // some games like appid 34330 don't call this
     StartVoiceRecording();
@@ -562,6 +567,7 @@ EVoiceResult Steam_User::GetVoice( bool bWantCompressed, void *pDestBuffer, uint
     PRINT_DEBUG_ENTRY();
     if (nBytesWritten) *nBytesWritten = 0;
     if (nUncompressBytesWritten_Deprecated) *nUncompressBytesWritten_Deprecated = 0;
+    if (!settings->enable_voice_chat) return k_EVoiceResultNoData;
 
     // should we have this here ? -detanup
     // some games might not initialize this.
diff --git a/post_build/steam_settings.EXAMPLE/configs.main.EXAMPLE.ini b/post_build/steam_settings.EXAMPLE/configs.main.EXAMPLE.ini
index ae77f8c0..d92030b1 100644
--- a/post_build/steam_settings.EXAMPLE/configs.main.EXAMPLE.ini
+++ b/post_build/steam_settings.EXAMPLE/configs.main.EXAMPLE.ini
@@ -18,6 +18,15 @@ steam_deck=0
 # 1=enable avatar functionality
 # default=0
 enable_account_avatar=0
+# enable the experimental voice chat feature
+# ----------------------------
+# XXXXXXXXXXXXXXXXXXXXXXXXXXXX
+# XXX USE AT YOUR OWN RISK XXX
+# XXXXXXXXXXXXXXXXXXXXXXXXXXXX
+# ----------------------------
+# this may result in higher system usage and cause performance drop, or cause crashes
+# default=0
+enable_voice_chat=0
 # 1=synchronize user stats/achievements with game servers as soon as possible instead of caching them until the next call to `Steam_RunCallbacks()`
 # not recommended to enable this
 # default=0

From eef0a39580430f903a9427ecd640b390d031c29e Mon Sep 17 00:00:00 2001
From: a <e>
Date: Mon, 3 Nov 2025 01:32:55 +0200
Subject: [PATCH 3/3] print debug voice functions

---
 dll/voicechat.cpp | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/dll/voicechat.cpp b/dll/voicechat.cpp
index 3e110fd0..dee4ae12 100644
--- a/dll/voicechat.cpp
+++ b/dll/voicechat.cpp
@@ -299,7 +299,9 @@ EVoiceResult VoiceChat::GetAvailableVoice(uint32_t* pcbCompressed) {
 
     if (encodedQueue.empty()) return k_EVoiceResultNoData;
 
-    *pcbCompressed = static_cast<uint32_t>(encodedQueue.front().size());
+    auto availableBytes = static_cast<uint32_t>(encodedQueue.front().size());
+    *pcbCompressed = availableBytes;
+    PRINT_DEBUG("available %u bytes of voice data", availableBytes);
     return k_EVoiceResultOK;
 }
 
@@ -318,22 +320,29 @@ EVoiceResult VoiceChat::GetVoice(bool bWantCompressed, void* pDestBuffer, uint32
     auto& encodedVoice = encodedQueue.front();
 
     EVoiceResult ret = k_EVoiceResultOK;
+    uint32_t actualWrittenBytes = 0;
     if (bWantCompressed) {
         if (cbDestBufferSize < encodedVoice.size()) {
             ret = k_EVoiceResultBufferTooSmall;
         }
         else {
             memcpy(pDestBuffer, encodedVoice.data(), encodedVoice.size());
-            *nBytesWritten = static_cast<uint32_t>(encodedVoice.size());
+            actualWrittenBytes = static_cast<uint32_t>(encodedVoice.size());
         }
     }
     else {
         ret = DecompressVoice(reinterpret_cast<const void*>(encodedVoice.data()), (uint32_t)encodedVoice.size(),
-            pDestBuffer, cbDestBufferSize, nBytesWritten, SAMPLE_RATE);
+            pDestBuffer, cbDestBufferSize, &actualWrittenBytes, SAMPLE_RATE);
     }
 
+    *nBytesWritten = actualWrittenBytes;
+
     if (k_EVoiceResultOK == ret) {
         encodedQueue.pop();
+        PRINT_DEBUG("returned %u bytes of voice data", actualWrittenBytes);
+    }
+    else {
+        PRINT_DEBUG("[X] Failed to get voice data <%i>", ret);
     }
     return ret;
 }
@@ -368,6 +377,7 @@ EVoiceResult VoiceChat::DecompressVoice(const void* pCompressed, uint32_t cbComp
     // https://opus-codec.org/docs/html_api/group__opusdecoder.html#ga1a8b923c1041ad4976ceada237e117ba
     // "[out] 	pcm 	opus_int16*: Output signal (interleaved if 2 channels). length is frame_size*channels*sizeof(opus_int16)"
     uint32_t bytesRequired = samplesPerChannel * CHANNELS_RECORDING * sizeof(opus_int16);
+    PRINT_DEBUG("required=%u bytes, buffer size=%u bytes", bytesRequired, cbDestBufferSize);
     // https://partner.steamgames.com/doc/api/ISteamUser#DecompressVoice
     // "nBytesWritten: Returns the number of bytes written to pDestBuffer,
     // or size of the buffer required to decompress the given data