TTS: Implement TextToSpeechManager for macOS

2025-02-04 01:46:42 +00:00 · 2019-07-28 14:56:38 +01:00 · 2019-07-28 14:56:38 +01:00 · 0434419b31
commit 0434419b31
parent e965df1e88
5 changed files with 293 additions and 0 deletions
--- a/backends/module.mk
+++ b/backends/module.mk
@ -352,6 +352,10 @@ ifdef USE_WINDOWS_TTS
 MODULE_OBJS += \
 	text-to-speech/windows/windows-text-to-speech.o
 endif
+ifdef USE_MACOSX_TTS
+MODULE_OBJS += \
+	text-to-speech/macosx/macosx-text-to-speech.o
+endif

 # Include common rules
 include $(srcdir)/rules.mk
--- a/backends/platform/sdl/macosx/macosx.cpp
+++ b/backends/platform/sdl/macosx/macosx.cpp
@ -32,6 +32,7 @@
 #include "backends/platform/sdl/macosx/macosx.h"
 #include "backends/updates/macosx/macosx-updates.h"
 #include "backends/taskbar/macosx/macosx-taskbar.h"
+#include "backends/text-to-speech/macosx/macosx-text-to-speech.h"
 #include "backends/dialogs/macosx/macosx-dialogs.h"
 #include "backends/platform/sdl/macosx/macosx_wrapper.h"
 #include "backends/fs/posix/posix-fs.h"
@ -86,6 +87,11 @@ void OSystem_MacOSX::initBackend() {
 	_updateManager = new MacOSXUpdateManager();
 #endif

+#ifdef USE_MACOSX_TTS
+	// Initialize Text to Speech manager
+	_textToSpeechManager = new MacOSXTextToSpeechManager();
+#endif
+
 	// Invoke parent implementation of this method
 	OSystem_POSIX::initBackend();
 }
--- a/backends/text-to-speech/macosx/macosx-text-to-speech.h
+++ b/backends/text-to-speech/macosx/macosx-text-to-speech.h
@ -0,0 +1,67 @@
+/* ScummVM - Graphic Adventure Engine
+ *
+ * ScummVM is the legal property of its developers, whose names
+ * are too numerous to list here. Please refer to the COPYRIGHT
+ * file distributed with this source distribution.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ */
+
+#ifndef BACKENDS_TEXT_TO_SPEECH_MACOSX_H
+#define BACKENDS_TEXT_TO_SPEECH_MACOSX_H
+
+#include "common/scummsys.h"
+
+#if defined(USE_MACOSX_TTS)
+
+#include "common/text-to-speech.h"
+#include "common/str.h"
+
+class MacOSXTextToSpeechManager : public Common::TextToSpeechManager {
+public:
+	MacOSXTextToSpeechManager();
+	virtual ~MacOSXTextToSpeechManager();
+
+	virtual bool say(Common::String str, Common::String charset = "");
+
+	virtual bool stop();
+	virtual bool pause();
+	virtual bool resume();
+
+	virtual bool isSpeaking();
+	virtual bool isPaused();
+	virtual bool isReady();
+
+	virtual void setVoice(unsigned index);
+
+	virtual void setRate(int rate);
+
+	virtual void setPitch(int pitch);
+
+	virtual void setVolume(unsigned volume);
+
+	virtual void setLanguage(Common::String language);
+
+	virtual void freeVoiceData(void *data);
+
+private:
+	virtual void updateVoices();
+};
+
+#endif
+
+#endif // BACKENDS_TEXT_TO_SPEECH_MACOSX_H
+
--- a/backends/text-to-speech/macosx/macosx-text-to-speech.mm
+++ b/backends/text-to-speech/macosx/macosx-text-to-speech.mm
@ -0,0 +1,206 @@
+/* ScummVM - Graphic Adventure Engine
+ *
+ * ScummVM is the legal property of its developers, whose names
+ * are too numerous to list here. Please refer to the COPYRIGHT
+ * file distributed with this source distribution.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ */
+
+// Disable symbol overrides so that we can use system headers.
+#define FORBIDDEN_SYMBOL_ALLOW_ALL
+
+#include "backends/text-to-speech/macosx/macosx-text-to-speech.h"
+
+#if defined(USE_MACOSX_TTS)
+#include "common/translation.h"
+#include <AppKit/NSSpeechSynthesizer.h>
+#include <Foundation/NSString.h>
+#include <CoreFoundation/CFString.h>
+
+NSSpeechSynthesizer* synthesizer;
+
+MacOSXTextToSpeechManager::MacOSXTextToSpeechManager() : Common::TextToSpeechManager() {
+	synthesizer = [[NSSpeechSynthesizer alloc] init];
+
+#ifdef USE_TRANSLATION
+	setLanguage(TransMan.getCurrentLanguage());
+#else
+	setLanguage("en");
+#endif
+}
+
+MacOSXTextToSpeechManager::~MacOSXTextToSpeechManager() {
+	[synthesizer release];
+}
+
+bool MacOSXTextToSpeechManager::say(Common::String text, Common::String encoding) {
+	if (encoding.empty()) {
+#ifdef USE_TRANSLATION
+		encoding = TransMan.getCurrentCharset();
+#endif
+	}
+
+	// Get current encoding
+	CFStringEncoding stringEncoding = kCFStringEncodingASCII;
+	if (!encoding.empty()) {
+		CFStringRef encStr = CFStringCreateWithCString(NULL, encoding.c_str(), kCFStringEncodingASCII);
+		stringEncoding = CFStringConvertIANACharSetNameToEncoding(encStr);
+		CFRelease(encStr);
+	}
+
+	CFStringRef textNSString = CFStringCreateWithCString(NULL, text.c_str(), stringEncoding);
+	bool status = [synthesizer startSpeakingString:(NSString *)textNSString];
+	CFRelease(textNSString);
+	return status;
+}
+
+bool MacOSXTextToSpeechManager::stop() {
+	[synthesizer stopSpeaking];
+	return true;
+}
+
+bool MacOSXTextToSpeechManager::pause() {
+	// Should we use NSSpeechWordBoundary, or even NSSpeechSentenceBoundary?
+	[synthesizer pauseSpeakingAtBoundary:NSSpeechImmediateBoundary];
+	return true;
+}
+
+bool MacOSXTextToSpeechManager::resume() {
+	[synthesizer continueSpeaking];
+	return true;
+}
+
+bool MacOSXTextToSpeechManager::isSpeaking() {
+	return [synthesizer isSpeaking];
+}
+
+bool MacOSXTextToSpeechManager::isPaused() {
+	NSDictionary *statusDict = (NSDictionary*) [synthesizer objectForProperty:NSSpeechStatusProperty error:nil];
+	return [[statusDict objectForKey:NSSpeechStatusOutputBusy] boolValue] && [[statusDict objectForKey:NSSpeechStatusOutputPaused] boolValue];
+}
+
+bool MacOSXTextToSpeechManager::isReady() {
+	NSDictionary *statusDict = (NSDictionary*) [synthesizer objectForProperty:NSSpeechStatusProperty error:nil];
+	return [[statusDict objectForKey:NSSpeechStatusOutputBusy] boolValue] == NO;
+}
+
+void MacOSXTextToSpeechManager::setVoice(unsigned index) {
+	if (_ttsState->_availableVoices.empty())
+		return;
+	assert(index < _ttsState->_availableVoices.size());
+	Common::TTSVoice voice = _ttsState->_availableVoices[index];
+	_ttsState->_activeVoice = index;
+
+	[synthesizer setVoice:(NSString*)voice.getData()];
+
+	// Setting the voice reset the pitch and rate to the voice defaults.
+	// Apply back the modifiers.
+	int pitch = getPitch(), rate = getRate();
+	Common::TextToSpeechManager::setPitch(0);
+	Common::TextToSpeechManager::setRate(0);
+	setPitch(pitch);
+	setRate(rate);
+}
+
+void MacOSXTextToSpeechManager::setRate(int rate) {
+	int oldRate = getRate();
+	Common::TextToSpeechManager::setRate(rate);
+	// The rate is a value between -100 and +100, with 0 being the default rate.
+	// Convert this to a multiplier between 0.5 and 1.5.
+	float oldRateMultiplier = 1.0f + oldRate / 200.0f;
+	float ratehMultiplier = 1.0f + rate / 200.0f;
+	synthesizer.rate = synthesizer.rate / oldRateMultiplier * ratehMultiplier;
+}
+
+void MacOSXTextToSpeechManager::setPitch(int pitch) {
+	int oldPitch = getPitch();
+	Common::TextToSpeechManager::setPitch(pitch);
+	// The pitch is a value between -100 and +100, with 0 being the default pitch.
+	// Convert this to a multiplier between 0.5 and 1.5 on the default voice pitch.
+	float oldPitchMultiplier = 1.0f + oldPitch / 200.0f;
+	float pitchMultiplier = 1.0f + pitch / 200.0f;
+	NSNumber *basePitchNumber = [synthesizer objectForProperty:NSSpeechPitchBaseProperty error:nil];
+	float basePitch = [basePitchNumber floatValue] / oldPitchMultiplier * pitchMultiplier;
+	[synthesizer setObject:[NSNumber numberWithFloat:basePitch] forProperty:NSSpeechPitchBaseProperty error:nil];
+}
+
+void MacOSXTextToSpeechManager::setVolume(unsigned volume) {
+	Common::TextToSpeechManager::setVolume(volume);
+	synthesizer.volume = volume / 100.0f;
+}
+
+void MacOSXTextToSpeechManager::setLanguage(Common::String language) {
+	Common::TextToSpeechManager::setLanguage(language);
+	updateVoices();
+}
+
+void MacOSXTextToSpeechManager::freeVoiceData(void *data) {
+	NSString* voiceId = (NSString*)data;
+	[voiceId release];
+}
+
+void MacOSXTextToSpeechManager::updateVoices() {
+	Common::String currentVoice;
+	if (!_ttsState->_availableVoices.empty())
+		currentVoice = _ttsState->_availableVoices[_ttsState->_activeVoice].getDescription();
+	_ttsState->_availableVoices.clear();
+	int activeVoiceIndex = -1, defaultVoiceIndex = -1;
+
+	Common::String lang = getLanguage();
+	NSArray *voices = [NSSpeechSynthesizer availableVoices];
+	NSString *defaultVoice = [NSSpeechSynthesizer defaultVoice];
+	int voiceIndex = 0;
+	for (NSString *voiceId in voices) {
+		NSDictionary *voiceAttr = [NSSpeechSynthesizer attributesForVoice:voiceId];
+		Common::String voiceLocale([[voiceAttr objectForKey:NSVoiceLocaleIdentifier] UTF8String]);
+		if (voiceLocale.hasPrefix(lang)) {
+			NSString *data = [[NSString alloc] initWithString:voiceId];
+			Common::String name([[voiceAttr objectForKey:NSVoiceName] UTF8String]);
+			Common::TTSVoice::Gender gender = Common::TTSVoice::UNKNOWN_GENDER;
+			NSString *voiceGender = [voiceAttr objectForKey:NSVoiceGender];
+			if (voiceGender != nil) {
+				// This can be VoiceGenderMale, VoiceGenderFemale, VoiceGenderNeuter
+				if ([voiceGender isEqualToString:@"VoiceGenderMale"])
+					gender = Common::TTSVoice::MALE;
+				else if ([voiceGender isEqualToString:@"VoiceGenderFemale"])
+					gender = Common::TTSVoice::FEMALE;
+			}
+			Common::TTSVoice::Age age = Common::TTSVoice::UNKNOWN_AGE;
+			NSNumber *voiceAge = [voiceAttr objectForKey:NSVoiceAge];
+			if (voiceAge != nil) {
+				if ([voiceAge integerValue] < 18)
+					age = Common::TTSVoice::CHILD;
+				else
+					age = Common::TTSVoice::ADULT;
+			}
+			Common::TTSVoice voice(gender, age, data, name);
+			_ttsState->_availableVoices.push_back(voice);
+			if (name == currentVoice)
+				activeVoiceIndex = voiceIndex;
+			if (defaultVoice != nil && [defaultVoice isEqualToString:voiceId])
+				defaultVoiceIndex = voiceIndex;
+			++voiceIndex;
+		}
+	}
+
+	if (activeVoiceIndex == -1)
+		activeVoiceIndex = defaultVoiceIndex == -1 ? 0 : defaultVoiceIndex;
+	setVoice(activeVoiceIndex);
+}
+
+
+#endif
--- a/10
+++ b/10
@ -169,6 +169,7 @@ _iconv=auto
 _tts=auto
 _linux_tts=no
 _windows_tts=no
+_macosx_tts=no
 # Default option behavior yes/no
 _debug_build=auto
 _release_build=auto
@ -4224,6 +4225,9 @@ int main(void) { return 0; }
 EOF
 			cc_check -lspeechd && _tts=yes
 			;;
+		darwin*)
+			_tts=yes
+			;;
 	esac
 fi
 echo "$_tts"
@ -5414,6 +5418,12 @@ else
 		define_in_config_if_yes $_windows_tts 'USE_WINDOWS_TTS'
 		append_var LIBS '-lsapi -lole32'
 		;;
+	darwin*)
+		echo "osx"
+		_tts=yes
+		_macosx_tts=yes
+		define_in_config_if_yes $_macosx_tts 'USE_MACOSX_TTS'
+		;;
 	*)
 		echo "no"
 		_tts=no